Fix typing across codebase
This commit is contained in:
parent
c76d740481
commit
3eb1bb6053
|
@ -21,7 +21,7 @@ env:
|
||||||
jobs:
|
jobs:
|
||||||
cancel-prior-workflows:
|
cancel-prior-workflows:
|
||||||
name: cancel-prior-workflows
|
name: cancel-prior-workflows
|
||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-20.04
|
||||||
steps:
|
steps:
|
||||||
- name: Cancel previous workflow runs
|
- name: Cancel previous workflow runs
|
||||||
uses: styfle/cancel-workflow-action@0.9.1
|
uses: styfle/cancel-workflow-action@0.9.1
|
||||||
|
@ -33,7 +33,7 @@ jobs:
|
||||||
# example here: https://github.com/dorny/paths-filter#examples
|
# example here: https://github.com/dorny/paths-filter#examples
|
||||||
change-filters:
|
change-filters:
|
||||||
name: filter-jobs-on-changes
|
name: filter-jobs-on-changes
|
||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-20.04
|
||||||
# Queried by downstream jobs to determine if they should run.
|
# Queried by downstream jobs to determine if they should run.
|
||||||
outputs:
|
outputs:
|
||||||
needs-docs: ${{ steps.filter.outputs.docs }}
|
needs-docs: ${{ steps.filter.outputs.docs }}
|
||||||
|
@ -73,7 +73,7 @@ jobs:
|
||||||
name: setup-self-hosted-manager
|
name: setup-self-hosted-manager
|
||||||
needs: change-filters
|
needs: change-filters
|
||||||
if: needs.change-filters.outputs.needs-manager == 'true'
|
if: needs.change-filters.outputs.needs-manager == 'true'
|
||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-20.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Install Python CI requirements
|
- name: Install Python CI requirements
|
||||||
|
@ -199,7 +199,7 @@ jobs:
|
||||||
name: documentation-check
|
name: documentation-check
|
||||||
needs: change-filters
|
needs: change-filters
|
||||||
if: needs.change-filters.outputs.needs-docs == 'true'
|
if: needs.change-filters.outputs.needs-docs == 'true'
|
||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-20.04
|
||||||
container:
|
container:
|
||||||
image: firesim/firesim-ci:v1.3
|
image: firesim/firesim-ci:v1.3
|
||||||
options: --entrypoint /bin/bash
|
options: --entrypoint /bin/bash
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
""" Tools to help manage afis. """
|
""" Tools to help manage afis. """
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import boto3
|
import boto3
|
||||||
from awstools.awstools import depaginated_boto_query
|
from awstools.awstools import depaginated_boto_query
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
""" This script configures your AWS account to run FireSim. """
|
""" This script configures your AWS account to run FireSim. """
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import annotations
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement, annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
|
@ -9,14 +10,15 @@ from fabric.api import prefix, local, run, env, lcd, parallel # type: ignore
|
||||||
from fabric.contrib.console import confirm # type: ignore
|
from fabric.contrib.console import confirm # type: ignore
|
||||||
from fabric.contrib.project import rsync_project # type: ignore
|
from fabric.contrib.project import rsync_project # type: ignore
|
||||||
|
|
||||||
from awstools.afitools import *
|
from awstools.afitools import firesim_tags_to_description, copy_afi_to_all_regions
|
||||||
from awstools.awstools import send_firesim_notification
|
from awstools.awstools import send_firesim_notification
|
||||||
from util.streamlogger import StreamLogger, InfoStreamLogger
|
from util.streamlogger import StreamLogger, InfoStreamLogger
|
||||||
|
|
||||||
# imports needed for python type checking
|
# imports needed for python type checking
|
||||||
from typing import Optional
|
from typing import Optional, TYPE_CHECKING
|
||||||
from buildtools.buildconfig import BuildConfig
|
if TYPE_CHECKING:
|
||||||
from buildtools.buildconfigfile import BuildConfigFile
|
from buildtools.buildconfig import BuildConfig
|
||||||
|
from buildtools.buildconfigfile import BuildConfigFile
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,14 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from time import strftime, gmtime
|
from time import strftime, gmtime
|
||||||
import pprint
|
import pprint
|
||||||
from importlib import import_module
|
|
||||||
|
|
||||||
from awstools.awstools import *
|
from awstools.awstools import valid_aws_configure_creds, aws_resource_names
|
||||||
|
|
||||||
# imports needed for python type checking
|
# imports needed for python type checking
|
||||||
from typing import Set, Any, Optional, Dict, TYPE_CHECKING
|
from typing import Set, Any, Optional, Dict, TYPE_CHECKING
|
||||||
# needed to avoid type-hint circular dependencies
|
|
||||||
# TODO: Solved in 3.7.+ by "from __future__ import annotations" (see https://stackoverflow.com/questions/33837918/type-hints-solve-circular-dependency)
|
|
||||||
# and normal "import <module> as ..." syntax (see https://www.reddit.com/r/Python/comments/cug90e/how_to_not_create_circular_dependencies_when/)
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from buildtools.buildconfigfile import BuildConfigFile
|
from buildtools.buildconfigfile import BuildConfigFile
|
||||||
else:
|
|
||||||
BuildConfigFile = object
|
|
||||||
|
|
||||||
class BuildConfig:
|
class BuildConfig:
|
||||||
"""Represents a single build configuration used to build RTL, drivers, and bitstreams.
|
"""Represents a single build configuration used to build RTL, drivers, and bitstreams.
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from time import strftime, gmtime
|
from time import strftime, gmtime
|
||||||
import pprint
|
import pprint
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import yaml
|
import yaml
|
||||||
from collections import defaultdict
|
|
||||||
from importlib import import_module
|
|
||||||
|
|
||||||
from runtools.runtime_config import RuntimeHWDB
|
from runtools.runtime_config import RuntimeHWDB
|
||||||
from buildtools.buildconfig import BuildConfig
|
from buildtools.buildconfig import BuildConfig
|
||||||
|
@ -13,7 +13,7 @@ from buildtools.buildfarm import BuildFarm
|
||||||
|
|
||||||
# imports needed for python type checking
|
# imports needed for python type checking
|
||||||
from typing import Dict, Optional, List, Set, Type, Any, TYPE_CHECKING
|
from typing import Dict, Optional, List, Set, Type, Any, TYPE_CHECKING
|
||||||
from argparse import Namespace
|
import argparse # this is not within a if TYPE_CHECKING: scope so the `register_task` in FireSim can evaluate it's annotation
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ class BuildConfigFile:
|
||||||
num_builds: Number of builds to run.
|
num_builds: Number of builds to run.
|
||||||
build_farm: Build farm used to host builds.
|
build_farm: Build farm used to host builds.
|
||||||
"""
|
"""
|
||||||
args: Namespace
|
args: argparse.Namespace
|
||||||
agfistoshare: List[str]
|
agfistoshare: List[str]
|
||||||
acctids_to_sharewith: List[str]
|
acctids_to_sharewith: List[str]
|
||||||
hwdb: RuntimeHWDB
|
hwdb: RuntimeHWDB
|
||||||
|
@ -59,7 +59,7 @@ class BuildConfigFile:
|
||||||
num_builds: int
|
num_builds: int
|
||||||
build_farm: BuildFarm
|
build_farm: BuildFarm
|
||||||
|
|
||||||
def __init__(self, args: Namespace) -> None:
|
def __init__(self, args: argparse.Namespace) -> None:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
args: Object holding arg attributes.
|
args: Object holding arg attributes.
|
||||||
|
|
|
@ -1,20 +1,16 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
|
||||||
import abc
|
import abc
|
||||||
import pprint
|
import pprint
|
||||||
|
|
||||||
from awstools.awstools import *
|
from awstools.awstools import aws_resource_names, launch_instances, wait_on_instance_launches, get_instance_ids_for_instances, terminate_instances
|
||||||
|
|
||||||
# imports needed for python type checking
|
# imports needed for python type checking
|
||||||
from typing import cast, Any, Dict, Optional, Sequence, List, TYPE_CHECKING
|
from typing import cast, Any, Dict, Optional, Sequence, List, TYPE_CHECKING
|
||||||
from mypy_boto3_ec2.service_resource import Instance as EC2InstanceResource
|
|
||||||
# needed to avoid type-hint circular dependencies
|
|
||||||
# TODO: Solved in 3.7.+ by "from __future__ import annotations" (see https://stackoverflow.com/questions/33837918/type-hints-solve-circular-dependency)
|
|
||||||
# and normal "import <module> as ..." syntax (see https://www.reddit.com/r/Python/comments/cug90e/how_to_not_create_circular_dependencies_when/)
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from buildtools.buildconfig import BuildConfig
|
from buildtools.buildconfig import BuildConfig
|
||||||
else:
|
from mypy_boto3_ec2.service_resource import Instance as EC2InstanceResource
|
||||||
BuildConfig = object
|
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# PYTHON_ARGCOMPLETE_OK
|
# PYTHON_ARGCOMPLETE_OK
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
@ -11,18 +12,12 @@ import random
|
||||||
import argcomplete # type: ignore
|
import argcomplete # type: ignore
|
||||||
from fabric.api import local, hide, warn_only, env, execute # type: ignore
|
from fabric.api import local, hide, warn_only, env, execute # type: ignore
|
||||||
import string
|
import string
|
||||||
from typing import Dict, Callable, Type, Optional
|
|
||||||
try:
|
|
||||||
# added in 3.8
|
|
||||||
from typing import TypedDict
|
|
||||||
except ImportError:
|
|
||||||
TypedDict = dict
|
|
||||||
from inspect import signature
|
from inspect import signature
|
||||||
|
|
||||||
from runtools.runtime_config import RuntimeConfig
|
from runtools.runtime_config import RuntimeConfig
|
||||||
|
|
||||||
from awstools.awstools import valid_aws_configure_creds, get_aws_userid, subscribe_to_firesim_topic
|
from awstools.awstools import valid_aws_configure_creds, get_aws_userid, subscribe_to_firesim_topic
|
||||||
from awstools.afitools import *
|
from awstools.afitools import share_agfi_in_all_regions
|
||||||
|
|
||||||
from buildtools.buildafi import replace_rtl, build_driver, aws_build, aws_create_afi
|
from buildtools.buildafi import replace_rtl, build_driver, aws_build, aws_create_afi
|
||||||
from buildtools.buildconfigfile import BuildConfigFile
|
from buildtools.buildconfigfile import BuildConfigFile
|
||||||
|
@ -30,6 +25,8 @@ from buildtools.buildconfig import BuildConfig
|
||||||
|
|
||||||
from util.streamlogger import StreamLogger
|
from util.streamlogger import StreamLogger
|
||||||
|
|
||||||
|
from typing import Dict, Callable, Type, Optional, TypedDict, get_type_hints
|
||||||
|
|
||||||
class Task(TypedDict):
|
class Task(TypedDict):
|
||||||
task: Callable
|
task: Callable
|
||||||
config: Optional[Callable]
|
config: Optional[Callable]
|
||||||
|
@ -59,6 +56,9 @@ def register_task(task: Callable) -> Callable:
|
||||||
|
|
||||||
config_class = None
|
config_class = None
|
||||||
|
|
||||||
|
# resolve str type hints
|
||||||
|
task.__annotations__ = get_type_hints(task)
|
||||||
|
|
||||||
# introspect the type of config that this task takes (it's first param)
|
# introspect the type of config that this task takes (it's first param)
|
||||||
sig = signature(task)
|
sig = signature(task)
|
||||||
if sig.parameters:
|
if sig.parameters:
|
||||||
|
@ -68,6 +68,9 @@ def register_task(task: Callable) -> Callable:
|
||||||
else:
|
else:
|
||||||
config_class = first.annotation
|
config_class = first.annotation
|
||||||
|
|
||||||
|
# resolve str type hints
|
||||||
|
config_class.__init__.__annotations__ = get_type_hints(config_class.__init__)
|
||||||
|
|
||||||
# check that the first parameter takes a Namespace passed to its constructor
|
# check that the first parameter takes a Namespace passed to its constructor
|
||||||
csig = signature(config_class)
|
csig = signature(config_class)
|
||||||
if csig.parameters:
|
if csig.parameters:
|
||||||
|
|
|
@ -1,8 +1,14 @@
|
||||||
""" These are the base components that make up a FireSim simulation target
|
""" These are the base components that make up a FireSim simulation target
|
||||||
topology. """
|
topology. """
|
||||||
|
|
||||||
from runtools.firesim_topology_elements import *
|
from __future__ import annotations
|
||||||
|
|
||||||
from runtools.user_topology import UserTopologies
|
from runtools.user_topology import UserTopologies
|
||||||
|
from runtools.firesim_topology_elements import FireSimSwitchNode, FireSimServerNode
|
||||||
|
|
||||||
|
from typing import List, Callable, Optional, Union, TYPE_CHECKING
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from runtools.firesim_topology_elements import FireSimNode
|
||||||
|
|
||||||
class FireSimTopology(UserTopologies):
|
class FireSimTopology(UserTopologies):
|
||||||
""" A FireSim Topology consists of a list of root FireSimNodes, which
|
""" A FireSim Topology consists of a list of root FireSimNodes, which
|
||||||
|
@ -10,10 +16,21 @@ class FireSimTopology(UserTopologies):
|
||||||
|
|
||||||
This is designed to model tree-like topologies."""
|
This is designed to model tree-like topologies."""
|
||||||
|
|
||||||
def get_dfs_order(self):
|
def __init__(self, user_topology_name: str, no_net_num_nodes: int) -> None:
|
||||||
|
# This just constructs the user topology. an upper level pass manager
|
||||||
|
# will apply passes to it.
|
||||||
|
|
||||||
|
# a topology can specify a custom target -> host mapping. if left as None,
|
||||||
|
# the default mapper is used, which handles no network and simple networked cases.
|
||||||
|
super().__init__(no_net_num_nodes)
|
||||||
|
|
||||||
|
config_func = getattr(self, user_topology_name)
|
||||||
|
config_func()
|
||||||
|
|
||||||
|
def get_dfs_order(self) -> List[FireSimNode]:
|
||||||
""" Return all nodes in the topology in dfs order, as a list. """
|
""" Return all nodes in the topology in dfs order, as a list. """
|
||||||
stack = list(self.roots)
|
stack = list(self.roots)
|
||||||
retlist = []
|
retlist: List[FireSimNode] = []
|
||||||
visitedonce = set()
|
visitedonce = set()
|
||||||
while stack:
|
while stack:
|
||||||
nextup = stack[0]
|
nextup = stack[0]
|
||||||
|
@ -27,26 +44,16 @@ class FireSimTopology(UserTopologies):
|
||||||
stack = list(map(lambda x: x.get_downlink_side(), nextup.downlinks)) + stack
|
stack = list(map(lambda x: x.get_downlink_side(), nextup.downlinks)) + stack
|
||||||
return retlist
|
return retlist
|
||||||
|
|
||||||
def get_dfs_order_switches(self):
|
def get_dfs_order_switches(self) -> List[FireSimSwitchNode]:
|
||||||
""" Utility function that returns only switches, in dfs order. """
|
""" Utility function that returns only switches, in dfs order. """
|
||||||
return [x for x in self.get_dfs_order() if isinstance(x, FireSimSwitchNode)]
|
return [x for x in self.get_dfs_order() if isinstance(x, FireSimSwitchNode)]
|
||||||
|
|
||||||
def get_dfs_order_servers(self):
|
def get_dfs_order_servers(self) -> List[FireSimServerNode]:
|
||||||
""" Utility function that returns only servers, in dfs order. """
|
""" Utility function that returns only servers, in dfs order. """
|
||||||
return [x for x in self.get_dfs_order() if isinstance(x, FireSimServerNode)]
|
return [x for x in self.get_dfs_order() if isinstance(x, FireSimServerNode)]
|
||||||
|
|
||||||
def get_bfs_order(self):
|
def get_bfs_order(self) -> None:
|
||||||
""" return the nodes in the topology in bfs order """
|
""" return the nodes in the topology in bfs order """
|
||||||
# don't forget to eliminate dups
|
# don't forget to eliminate dups
|
||||||
assert False, "TODO"
|
assert False, "TODO"
|
||||||
|
|
||||||
def __init__(self, user_topology_name, no_net_num_nodes):
|
|
||||||
# This just constructs the user topology. an upper level pass manager
|
|
||||||
# will apply passes to it.
|
|
||||||
|
|
||||||
# a topology can specify a custom target -> host mapping. if left as None,
|
|
||||||
# the default mapper is used, which handles no network and simple networked cases.
|
|
||||||
self.custom_mapper = None
|
|
||||||
self.no_net_num_nodes = no_net_num_nodes
|
|
||||||
config_func = getattr(self, user_topology_name)
|
|
||||||
config_func()
|
|
||||||
|
|
|
@ -1,17 +1,26 @@
|
||||||
""" Node types necessary to construct a FireSimTopology. """
|
""" Node types necessary to construct a FireSimTopology. """
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import abc
|
||||||
|
from fabric.contrib.project import rsync_project # type: ignore
|
||||||
|
from fabric.api import run, local, warn_only, get # type: ignore
|
||||||
|
|
||||||
from runtools.switch_model_config import AbstractSwitchToSwitchConfig
|
from runtools.switch_model_config import AbstractSwitchToSwitchConfig
|
||||||
from runtools.utils import get_local_shared_libraries
|
from runtools.utils import get_local_shared_libraries
|
||||||
from util.streamlogger import StreamLogger
|
from util.streamlogger import StreamLogger
|
||||||
from fabric.api import * # type: ignore
|
|
||||||
from fabric.contrib.project import rsync_project # type: ignore
|
from typing import Optional, List, Tuple, Sequence, Union, TYPE_CHECKING
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from runtools.workload import JobConfig
|
||||||
|
from runtools.run_farm import EC2Inst
|
||||||
|
from runtools.runtime_config import RuntimeHWConfig
|
||||||
|
from runtools.utils import MacAddress
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
|
class FireSimLink:
|
||||||
class FireSimLink(object):
|
|
||||||
""" This represents a link that connects different FireSimNodes.
|
""" This represents a link that connects different FireSimNodes.
|
||||||
|
|
||||||
Terms:
|
Terms:
|
||||||
|
@ -29,12 +38,16 @@ class FireSimLink(object):
|
||||||
RootSwitch has a downlink to Sim X.
|
RootSwitch has a downlink to Sim X.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# links have a globally unique identifier, currently used for naming
|
# links have a globally unique identifier, currently used for naming
|
||||||
# shmem regions for Shmem Links
|
# shmem regions for Shmem Links
|
||||||
next_unique_link_identifier = 0
|
next_unique_link_identifier: int = 0
|
||||||
|
id: int
|
||||||
|
id_as_str: str
|
||||||
|
uplink_side: Optional[FireSimNode]
|
||||||
|
downlink_side: Optional[FireSimNode]
|
||||||
|
port: Optional[int]
|
||||||
|
|
||||||
def __init__(self, uplink_side, downlink_side):
|
def __init__(self, uplink_side: FireSimNode, downlink_side: FireSimNode) -> None:
|
||||||
self.id = FireSimLink.next_unique_link_identifier
|
self.id = FireSimLink.next_unique_link_identifier
|
||||||
FireSimLink.next_unique_link_identifier += 1
|
FireSimLink.next_unique_link_identifier += 1
|
||||||
# format as 100 char hex string padded with zeroes
|
# format as 100 char hex string padded with zeroes
|
||||||
|
@ -45,45 +58,46 @@ class FireSimLink(object):
|
||||||
self.set_uplink_side(uplink_side)
|
self.set_uplink_side(uplink_side)
|
||||||
self.set_downlink_side(downlink_side)
|
self.set_downlink_side(downlink_side)
|
||||||
|
|
||||||
def set_uplink_side(self, fsimnode):
|
def set_uplink_side(self, fsimnode: FireSimNode) -> None:
|
||||||
self.uplink_side = fsimnode
|
self.uplink_side = fsimnode
|
||||||
|
|
||||||
def set_downlink_side(self, fsimnode):
|
def set_downlink_side(self, fsimnode: FireSimNode) -> None:
|
||||||
self.downlink_side = fsimnode
|
self.downlink_side = fsimnode
|
||||||
|
|
||||||
def get_uplink_side(self):
|
def get_uplink_side(self) -> FireSimNode:
|
||||||
|
assert self.uplink_side is not None
|
||||||
return self.uplink_side
|
return self.uplink_side
|
||||||
|
|
||||||
def get_downlink_side(self):
|
def get_downlink_side(self) -> FireSimNode:
|
||||||
|
assert self.downlink_side is not None
|
||||||
return self.downlink_side
|
return self.downlink_side
|
||||||
|
|
||||||
def link_hostserver_port(self):
|
def link_hostserver_port(self) -> int:
|
||||||
""" Get the port used for this Link. This should only be called for
|
""" Get the port used for this Link. This should only be called for
|
||||||
links implemented with SocketPorts. """
|
links implemented with SocketPorts. """
|
||||||
if self.port is None:
|
if self.port is None:
|
||||||
self.port = self.get_uplink_side().host_instance.allocate_host_port()
|
self.port = self.get_uplink_side().get_host_instance().allocate_host_port()
|
||||||
return self.port
|
return self.port
|
||||||
|
|
||||||
def link_hostserver_ip(self):
|
def link_hostserver_ip(self) -> str:
|
||||||
""" Get the IP address used for this Link. This should only be called for
|
""" Get the IP address used for this Link. This should only be called for
|
||||||
links implemented with SocketPorts. """
|
links implemented with SocketPorts. """
|
||||||
assert self.get_uplink_side().host_instance.is_bound_to_real_instance(), "Instances must be bound to private IP to emit switches with uplinks. i.e. you must have a running Run Farm."
|
return self.get_uplink_side().get_host_instance().get_private_ip()
|
||||||
return self.get_uplink_side().host_instance.get_private_ip()
|
|
||||||
|
|
||||||
def link_crosses_hosts(self):
|
def link_crosses_hosts(self) -> bool:
|
||||||
""" Return True if the user has mapped the two endpoints of this link to
|
""" Return True if the user has mapped the two endpoints of this link to
|
||||||
separate hosts. This implies a SocketServerPort / SocketClientPort will be used
|
separate hosts. This implies a SocketServerPort / SocketClientPort will be used
|
||||||
to implement the Link. If False, use a sharedmem port to implement the link. """
|
to implement the Link. If False, use a sharedmem port to implement the link. """
|
||||||
if type(self.get_downlink_side()) == FireSimDummyServerNode:
|
if isinstance(self.get_downlink_side(), FireSimDummyServerNode):
|
||||||
return False
|
return False
|
||||||
return self.get_uplink_side().host_instance != self.get_downlink_side().host_instance
|
return self.get_uplink_side().get_host_instance() != self.get_downlink_side().get_host_instance()
|
||||||
|
|
||||||
def get_global_link_id(self):
|
def get_global_link_id(self) -> str:
|
||||||
""" Return the globally unique link id, used for naming shmem ports. """
|
""" Return the globally unique link id, used for naming shmem ports. """
|
||||||
return self.id_as_str
|
return self.id_as_str
|
||||||
|
|
||||||
|
|
||||||
class FireSimNode(object):
|
class FireSimNode(metaclass=abc.ABCMeta):
|
||||||
""" This represents a node in the high-level FireSim Simulation Topology
|
""" This represents a node in the high-level FireSim Simulation Topology
|
||||||
Graph. These nodes are either
|
Graph. These nodes are either
|
||||||
|
|
||||||
|
@ -100,15 +114,20 @@ class FireSimNode(object):
|
||||||
3) Assigning workloads to run to simulators
|
3) Assigning workloads to run to simulators
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
downlinks: List[FireSimLink]
|
||||||
|
downlinkmacs: List[MacAddress]
|
||||||
|
uplinks: List[FireSimLink]
|
||||||
|
host_instance: Optional[EC2Inst]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.downlinks = []
|
self.downlinks = []
|
||||||
|
self.downlinkmacs = []
|
||||||
# used when there are multiple links between switches to disambiguate
|
# used when there are multiple links between switches to disambiguate
|
||||||
#self.downlinks_consumed = []
|
#self.downlinks_consumed = []
|
||||||
self.uplinks = []
|
self.uplinks = []
|
||||||
self.host_instance = None
|
self.host_instance = None
|
||||||
|
|
||||||
def add_downlink(self, firesimnode):
|
def add_downlink(self, firesimnode: FireSimNode) -> None:
|
||||||
""" A "downlink" is a link that will take you further from the root
|
""" A "downlink" is a link that will take you further from the root
|
||||||
of the tree. Users define a tree topology by specifying "downlinks".
|
of the tree. Users define a tree topology by specifying "downlinks".
|
||||||
Uplinks are automatically inferred. """
|
Uplinks are automatically inferred. """
|
||||||
|
@ -117,12 +136,13 @@ class FireSimNode(object):
|
||||||
self.downlinks.append(linkobj)
|
self.downlinks.append(linkobj)
|
||||||
#self.downlinks_consumed.append(False)
|
#self.downlinks_consumed.append(False)
|
||||||
|
|
||||||
def add_downlinks(self, firesimnodes):
|
def add_downlinks(self, firesimnodes: Sequence[FireSimNode]) -> None:
|
||||||
""" Just a convenience function to add multiple downlinks at once.
|
""" Just a convenience function to add multiple downlinks at once.
|
||||||
Assumes downlinks in the supplied list are ordered. """
|
Assumes downlinks in the supplied list are ordered. """
|
||||||
[self.add_downlink(node) for node in firesimnodes]
|
for node in firesimnodes:
|
||||||
|
self.add_downlink(node)
|
||||||
|
|
||||||
def add_uplink(self, firesimlink):
|
def add_uplink(self, firesimlink: FireSimLink) -> None:
|
||||||
""" This is only for internal use - uplinks are automatically populated
|
""" This is only for internal use - uplinks are automatically populated
|
||||||
when a node is specified as the downlink of another.
|
when a node is specified as the downlink of another.
|
||||||
|
|
||||||
|
@ -130,40 +150,53 @@ class FireSimNode(object):
|
||||||
tree."""
|
tree."""
|
||||||
self.uplinks.append(firesimlink)
|
self.uplinks.append(firesimlink)
|
||||||
|
|
||||||
def num_links(self):
|
def num_links(self) -> int:
|
||||||
""" Return the total number of nodes. """
|
""" Return the total number of nodes. """
|
||||||
return len(self.downlinks) + len(self.uplinks)
|
return len(self.downlinks) + len(self.uplinks)
|
||||||
|
|
||||||
def run_node_simulation(self):
|
def has_assigned_host_instance(self) -> bool:
|
||||||
""" Override this to provide the ability to launch your simulation. """
|
return self.host_instance is not None
|
||||||
pass
|
|
||||||
|
|
||||||
def terminate_node_simulation(self):
|
def assign_host_instance(self, host_instance_run_farm_object: EC2Inst) -> None:
|
||||||
""" Override this to provide the ability to terminate your simulation. """
|
|
||||||
pass
|
|
||||||
|
|
||||||
def has_assigned_host_instance(self):
|
|
||||||
if self.host_instance is None:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def assign_host_instance(self, host_instance_run_farm_object):
|
|
||||||
self.host_instance = host_instance_run_farm_object
|
self.host_instance = host_instance_run_farm_object
|
||||||
|
|
||||||
def get_host_instance(self):
|
def get_host_instance(self) -> EC2Inst:
|
||||||
|
assert self.host_instance is not None
|
||||||
return self.host_instance
|
return self.host_instance
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def diagramstr(self) -> str:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
class FireSimServerNode(FireSimNode):
|
class FireSimServerNode(FireSimNode):
|
||||||
""" This is a simulated server instance in FireSim. """
|
""" This is a simulated server instance in FireSim. """
|
||||||
SERVERS_CREATED = 0
|
SERVERS_CREATED: int = 0
|
||||||
|
server_hardware_config: Optional[Union[RuntimeHWConfig, str]]
|
||||||
|
server_link_latency: Optional[int]
|
||||||
|
server_bw_max: Optional[int]
|
||||||
|
server_profile_interval: Optional[int]
|
||||||
|
trace_enable: Optional[bool]
|
||||||
|
trace_select: Optional[str]
|
||||||
|
trace_start: Optional[str]
|
||||||
|
trace_end: Optional[str]
|
||||||
|
trace_output_format: Optional[str]
|
||||||
|
autocounter_readrate: Optional[int]
|
||||||
|
zerooutdram: Optional[bool]
|
||||||
|
disable_asserts: Optional[bool]
|
||||||
|
print_start: Optional[str]
|
||||||
|
print_end: Optional[str]
|
||||||
|
print_cycle_prefix: Optional[bool]
|
||||||
|
job: Optional[JobConfig]
|
||||||
|
server_id_internal: int
|
||||||
|
mac_address: Optional[MacAddress]
|
||||||
|
|
||||||
def __init__(self, server_hardware_config=None, server_link_latency=None,
|
def __init__(self, server_hardware_config: Optional[Union[RuntimeHWConfig, str]] = None, server_link_latency: Optional[int] = None,
|
||||||
server_bw_max=None, server_profile_interval=None,
|
server_bw_max: Optional[int] = None, server_profile_interval: Optional[int] = None,
|
||||||
trace_enable=None, trace_select=None, trace_start=None, trace_end=None, trace_output_format=None, autocounter_readrate=None,
|
trace_enable: Optional[bool] = None, trace_select: Optional[str] = None, trace_start: Optional[str] = None, trace_end: Optional[str] = None, trace_output_format: Optional[str] = None, autocounter_readrate: Optional[int] = None,
|
||||||
zerooutdram=None, disable_asserts=None,
|
zerooutdram: Optional[bool] = None, disable_asserts: Optional[bool] = None,
|
||||||
print_start=None, print_end=None, print_cycle_prefix=None):
|
print_start: Optional[str] = None, print_end: Optional[str] = None, print_cycle_prefix: Optional[bool] = None):
|
||||||
super(FireSimServerNode, self).__init__()
|
super().__init__()
|
||||||
self.server_hardware_config = server_hardware_config
|
self.server_hardware_config = server_hardware_config
|
||||||
self.server_link_latency = server_link_latency
|
self.server_link_latency = server_link_latency
|
||||||
self.server_bw_max = server_bw_max
|
self.server_bw_max = server_bw_max
|
||||||
|
@ -181,21 +214,27 @@ class FireSimServerNode(FireSimNode):
|
||||||
self.print_cycle_prefix = print_cycle_prefix
|
self.print_cycle_prefix = print_cycle_prefix
|
||||||
self.job = None
|
self.job = None
|
||||||
self.server_id_internal = FireSimServerNode.SERVERS_CREATED
|
self.server_id_internal = FireSimServerNode.SERVERS_CREATED
|
||||||
|
self.mac_address = None
|
||||||
FireSimServerNode.SERVERS_CREATED += 1
|
FireSimServerNode.SERVERS_CREATED += 1
|
||||||
|
|
||||||
def set_server_hardware_config(self, server_hardware_config):
|
def set_server_hardware_config(self, server_hardware_config: RuntimeHWConfig) -> None:
|
||||||
self.server_hardware_config = server_hardware_config
|
self.server_hardware_config = server_hardware_config
|
||||||
|
|
||||||
def get_server_hardware_config(self):
|
def get_server_hardware_config(self) -> Optional[Union[RuntimeHWConfig, str]]:
|
||||||
return self.server_hardware_config
|
return self.server_hardware_config
|
||||||
|
|
||||||
def assign_mac_address(self, macaddr):
|
def get_resolved_server_hardware_config(self) -> RuntimeHWConfig:
|
||||||
|
assert self.server_hardware_config is not None and not isinstance(self.server_hardware_config, str)
|
||||||
|
return self.server_hardware_config
|
||||||
|
|
||||||
|
def assign_mac_address(self, macaddr: MacAddress) -> None:
|
||||||
self.mac_address = macaddr
|
self.mac_address = macaddr
|
||||||
|
|
||||||
def get_mac_address(self):
|
def get_mac_address(self) -> MacAddress:
|
||||||
|
assert self.mac_address is not None
|
||||||
return self.mac_address
|
return self.mac_address
|
||||||
|
|
||||||
def process_qcow2_rootfses(self, rootfses_list):
|
def process_qcow2_rootfses(self, rootfses_list: List[Optional[str]]) -> List[Optional[str]]:
|
||||||
""" Take in list of all rootfses on this node. For the qcow2 ones, find
|
""" Take in list of all rootfses on this node. For the qcow2 ones, find
|
||||||
the allocated devices, attach the device to the qcow2 image on the
|
the allocated devices, attach the device to the qcow2 image on the
|
||||||
remote node, and replace it in the list with that nbd device. Return
|
remote node, and replace it in the list with that nbd device. Return
|
||||||
|
@ -207,7 +246,7 @@ class FireSimServerNode(FireSimNode):
|
||||||
|
|
||||||
result_list = []
|
result_list = []
|
||||||
for rootfsname in rootfses_list:
|
for rootfsname in rootfses_list:
|
||||||
if rootfsname and rootfsname.endswith(".qcow2"):
|
if rootfsname is not None and rootfsname.endswith(".qcow2"):
|
||||||
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||||
|
|
||||||
# connect the /dev/nbdX device to the rootfs
|
# connect the /dev/nbdX device to the rootfs
|
||||||
|
@ -216,16 +255,16 @@ class FireSimServerNode(FireSimNode):
|
||||||
result_list.append(rootfsname)
|
result_list.append(rootfsname)
|
||||||
return result_list
|
return result_list
|
||||||
|
|
||||||
def allocate_nbds(self):
|
def allocate_nbds(self) -> None:
|
||||||
""" called by the allocate nbds pass to assign an nbd to a qcow2 image.
|
""" called by the allocate nbds pass to assign an nbd to a qcow2 image.
|
||||||
"""
|
"""
|
||||||
rootfses_list = [self.get_rootfs_name()]
|
rootfses_list = [self.get_rootfs_name()]
|
||||||
for rootfsname in rootfses_list:
|
for rootfsname in rootfses_list:
|
||||||
if rootfsname and rootfsname.endswith(".qcow2"):
|
if rootfsname is not None and rootfsname.endswith(".qcow2"):
|
||||||
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||||
|
|
||||||
|
|
||||||
def diagramstr(self):
|
def diagramstr(self) -> str:
|
||||||
msg = """{}:{}\n----------\nMAC: {}\n{}\n{}""".format("FireSimServerNode",
|
msg = """{}:{}\n----------\nMAC: {}\n{}\n{}""".format("FireSimServerNode",
|
||||||
str(self.server_id_internal),
|
str(self.server_id_internal),
|
||||||
str(self.mac_address),
|
str(self.mac_address),
|
||||||
|
@ -233,7 +272,7 @@ class FireSimServerNode(FireSimNode):
|
||||||
str(self.server_hardware_config))
|
str(self.server_hardware_config))
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
def run_sim_start_command(self, slotno):
|
def run_sim_start_command(self, slotno: int) -> None:
|
||||||
""" get/run the command to run a simulation. assumes it will be
|
""" get/run the command to run a simulation. assumes it will be
|
||||||
called in a directory where its required_files are already located.
|
called in a directory where its required_files are already located.
|
||||||
"""
|
"""
|
||||||
|
@ -248,7 +287,12 @@ class FireSimServerNode(FireSimNode):
|
||||||
all_bootbins = [self.get_bootbin_name()]
|
all_bootbins = [self.get_bootbin_name()]
|
||||||
all_shmemportnames = [shmemportname]
|
all_shmemportnames = [shmemportname]
|
||||||
|
|
||||||
runcommand = self.server_hardware_config.get_boot_simulation_command(
|
assert (self.server_profile_interval is not None and all_bootbins is not None and self.trace_enable is not None and
|
||||||
|
self.trace_select is not None and self.trace_start is not None and self.trace_end is not None and self.trace_output_format is not None and
|
||||||
|
self.autocounter_readrate is not None and all_shmemportnames is not None and self.zerooutdram is not None and self.disable_asserts is not None and
|
||||||
|
self.print_start is not None and self.print_end is not None and self.print_cycle_prefix is not None)
|
||||||
|
|
||||||
|
runcommand = self.get_resolved_server_hardware_config().get_boot_simulation_command(
|
||||||
slotno, all_macs, all_rootfses, all_linklatencies, all_maxbws,
|
slotno, all_macs, all_rootfses, all_linklatencies, all_maxbws,
|
||||||
self.server_profile_interval, all_bootbins, self.trace_enable,
|
self.server_profile_interval, all_bootbins, self.trace_enable,
|
||||||
self.trace_select, self.trace_start, self.trace_end, self.trace_output_format,
|
self.trace_select, self.trace_start, self.trace_end, self.trace_output_format,
|
||||||
|
@ -257,7 +301,7 @@ class FireSimServerNode(FireSimNode):
|
||||||
|
|
||||||
run(runcommand)
|
run(runcommand)
|
||||||
|
|
||||||
def copy_back_job_results_from_run(self, slotno):
|
def copy_back_job_results_from_run(self, slotno: int) -> None:
|
||||||
"""
|
"""
|
||||||
1) Make the local directory for this job's output
|
1) Make the local directory for this job's output
|
||||||
2) Copy back UART log
|
2) Copy back UART log
|
||||||
|
@ -336,25 +380,28 @@ class FireSimServerNode(FireSimNode):
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
|
|
||||||
def get_sim_kill_command(self, slotno):
|
def get_sim_kill_command(self, slotno: int) -> str:
|
||||||
""" return the command to kill the simulation. assumes it will be
|
""" return the command to kill the simulation. assumes it will be
|
||||||
called in a directory where its required_files are already located.
|
called in a directory where its required_files are already located.
|
||||||
"""
|
"""
|
||||||
return self.server_hardware_config.get_kill_simulation_command()
|
return self.get_resolved_server_hardware_config().get_kill_simulation_command()
|
||||||
|
|
||||||
def get_required_files_local_paths(self):
|
def get_required_files_local_paths(self) -> List[Tuple[str, str]]:
|
||||||
""" Return local paths of all stuff needed to run this simulation as
|
""" Return local paths of all stuff needed to run this simulation as
|
||||||
an array. """
|
an array. """
|
||||||
all_paths = []
|
all_paths = []
|
||||||
|
|
||||||
if self.get_job().rootfs_path() is not None:
|
job_rootfs_path = self.get_job().rootfs_path()
|
||||||
all_paths.append([self.get_job().rootfs_path(), self.get_rootfs_name()])
|
if job_rootfs_path is not None:
|
||||||
|
self_rootfs_name = self.get_rootfs_name()
|
||||||
|
assert self_rootfs_name is not None
|
||||||
|
all_paths.append((job_rootfs_path, self_rootfs_name))
|
||||||
|
|
||||||
all_paths.append([self.get_job().bootbinary_path(), self.get_bootbin_name()])
|
all_paths.append((self.get_job().bootbinary_path(), self.get_bootbin_name()))
|
||||||
|
|
||||||
driver_path = self.server_hardware_config.get_local_driver_path()
|
driver_path = self.get_resolved_server_hardware_config().get_local_driver_path()
|
||||||
all_paths.append([driver_path, ''])
|
all_paths.append((driver_path, ''))
|
||||||
all_paths.append([self.server_hardware_config.get_local_runtime_conf_path(), ''])
|
all_paths.append((self.get_resolved_server_hardware_config().get_local_runtime_conf_path(), ''))
|
||||||
|
|
||||||
# shared libraries
|
# shared libraries
|
||||||
all_paths += get_local_shared_libraries(driver_path)
|
all_paths += get_local_shared_libraries(driver_path)
|
||||||
|
@ -362,29 +409,33 @@ class FireSimServerNode(FireSimNode):
|
||||||
all_paths += self.get_job().get_siminputs()
|
all_paths += self.get_job().get_siminputs()
|
||||||
return all_paths
|
return all_paths
|
||||||
|
|
||||||
def get_agfi(self):
|
def get_agfi(self) -> str:
|
||||||
""" Return the AGFI that should be flashed. """
|
""" Return the AGFI that should be flashed. """
|
||||||
return self.server_hardware_config.agfi
|
return self.get_resolved_server_hardware_config().agfi
|
||||||
|
|
||||||
def assign_job(self, job):
|
def assign_job(self, job: JobConfig) -> None:
|
||||||
""" Assign a job to this node. """
|
""" Assign a job to this node. """
|
||||||
self.job = job
|
self.job = job
|
||||||
|
|
||||||
def get_job(self):
|
def get_job(self) -> JobConfig:
|
||||||
""" Get the job assigned to this node. """
|
""" Get the job assigned to this node. """
|
||||||
|
assert self.job is not None
|
||||||
return self.job
|
return self.job
|
||||||
|
|
||||||
def get_job_name(self):
|
def get_job_name(self) -> str:
|
||||||
|
assert self.job is not None
|
||||||
return self.job.jobname
|
return self.job.jobname
|
||||||
|
|
||||||
def get_rootfs_name(self):
|
def get_rootfs_name(self) -> Optional[str]:
|
||||||
if self.get_job().rootfs_path() is None:
|
rootfs_path = self.get_job().rootfs_path()
|
||||||
|
if rootfs_path is None:
|
||||||
return None
|
return None
|
||||||
# prefix rootfs name with the job name to disambiguate in supernode
|
else:
|
||||||
# cases
|
# prefix rootfs name with the job name to disambiguate in supernode
|
||||||
return self.get_job_name() + "-" + self.get_job().rootfs_path().split("/")[-1]
|
# cases
|
||||||
|
return self.get_job_name() + "-" + rootfs_path.split("/")[-1]
|
||||||
|
|
||||||
def get_bootbin_name(self):
|
def get_bootbin_name(self) -> str:
|
||||||
# prefix bootbin name with the job name to disambiguate in supernode
|
# prefix bootbin name with the job name to disambiguate in supernode
|
||||||
# cases
|
# cases
|
||||||
return self.get_job_name() + "-" + self.get_job().bootbinary_path().split("/")[-1]
|
return self.get_job_name() + "-" + self.get_job().bootbinary_path().split("/")[-1]
|
||||||
|
@ -395,10 +446,13 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
||||||
call out to dummy server nodes to get all the info to launch the one
|
call out to dummy server nodes to get all the info to launch the one
|
||||||
command line to run the FPGA sim that has N > 1 sims on one fpga."""
|
command line to run the FPGA sim that has N > 1 sims on one fpga."""
|
||||||
|
|
||||||
def copy_back_job_results_from_run(self, slotno):
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def copy_back_job_results_from_run(self, slotno: int) -> None:
|
||||||
""" This override is to call copy back job results for all the dummy nodes too. """
|
""" This override is to call copy back job results for all the dummy nodes too. """
|
||||||
# first call the original
|
# first call the original
|
||||||
super(FireSimSuperNodeServerNode, self).copy_back_job_results_from_run(slotno)
|
super().copy_back_job_results_from_run(slotno)
|
||||||
|
|
||||||
# call on all siblings
|
# call on all siblings
|
||||||
num_siblings = self.supernode_get_num_siblings_plus_one()
|
num_siblings = self.supernode_get_num_siblings_plus_one()
|
||||||
|
@ -413,26 +467,26 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
||||||
sib.copy_back_job_results_from_run(slotno)
|
sib.copy_back_job_results_from_run(slotno)
|
||||||
|
|
||||||
|
|
||||||
def allocate_nbds(self):
|
def allocate_nbds(self) -> None:
|
||||||
""" called by the allocate nbds pass to assign an nbd to a qcow2 image.
|
""" called by the allocate nbds pass to assign an nbd to a qcow2 image.
|
||||||
"""
|
"""
|
||||||
num_siblings = self.supernode_get_num_siblings_plus_one()
|
num_siblings = self.supernode_get_num_siblings_plus_one()
|
||||||
|
|
||||||
rootfses_list = [self.get_rootfs_name()] + [self.supernode_get_sibling_rootfs(x) for x in range(1, num_siblings)]
|
rootfses_list = [self.get_rootfs_name()] + [self.supernode_get_sibling(x).get_rootfs_name() for x in range(1, num_siblings)]
|
||||||
|
|
||||||
for rootfsname in rootfses_list:
|
for rootfsname in rootfses_list:
|
||||||
if rootfsname.endswith(".qcow2"):
|
if rootfsname is not None and rootfsname.endswith(".qcow2"):
|
||||||
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def supernode_get_num_siblings_plus_one(self):
|
def supernode_get_num_siblings_plus_one(self) -> int:
|
||||||
""" This returns the number of siblings the supernodeservernode has,
|
""" This returns the number of siblings the supernodeservernode has,
|
||||||
plus one (because in most places, we use siblings + 1, not just siblings)
|
plus one (because in most places, we use siblings + 1, not just siblings)
|
||||||
"""
|
"""
|
||||||
siblings = 1
|
siblings = 1
|
||||||
count = False
|
count = False
|
||||||
for index, servernode in enumerate(map( lambda x : x.get_downlink_side(), self.uplinks[0].get_uplink_side().downlinks)):
|
for index, servernode in enumerate(map(lambda x : x.get_downlink_side(), self.uplinks[0].get_uplink_side().downlinks)):
|
||||||
if count:
|
if count:
|
||||||
if isinstance(servernode, FireSimDummyServerNode):
|
if isinstance(servernode, FireSimDummyServerNode):
|
||||||
siblings += 1
|
siblings += 1
|
||||||
|
@ -442,68 +496,47 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
||||||
count = True
|
count = True
|
||||||
return siblings
|
return siblings
|
||||||
|
|
||||||
def supernode_get_sibling(self, siblingindex):
|
def supernode_get_sibling(self, siblingindex: int) -> FireSimDummyServerNode:
|
||||||
""" return the sibling for supernode mode.
|
""" return the sibling for supernode mode.
|
||||||
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
||||||
for index, servernode in enumerate(map( lambda x : x.get_downlink_side(), self.uplinks[0].get_uplink_side().downlinks)):
|
for index, servernode in enumerate(map(lambda x : x.get_downlink_side(), self.uplinks[0].get_uplink_side().downlinks)):
|
||||||
if self == servernode:
|
if self == servernode:
|
||||||
return self.uplinks[0].get_uplink_side().downlinks[index+siblingindex].get_downlink_side()
|
node = self.uplinks[0].get_uplink_side().downlinks[index+siblingindex].get_downlink_side()
|
||||||
|
assert isinstance(node, FireSimDummyServerNode)
|
||||||
|
return node
|
||||||
|
assert False, "Should return supernode sibling"
|
||||||
|
|
||||||
def supernode_get_sibling_mac_address(self, siblingindex):
|
def run_sim_start_command(self, slotno: int) -> None:
|
||||||
""" return the sibling's mac address for supernode mode.
|
|
||||||
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
|
||||||
return self.supernode_get_sibling(siblingindex).get_mac_address()
|
|
||||||
|
|
||||||
def supernode_get_sibling_rootfs(self, siblingindex):
|
|
||||||
""" return the sibling's rootfs for supernode mode.
|
|
||||||
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
|
||||||
return self.supernode_get_sibling(siblingindex).get_rootfs_name()
|
|
||||||
|
|
||||||
def supernode_get_sibling_bootbin(self, siblingindex):
|
|
||||||
""" return the sibling's rootfs for supernode mode.
|
|
||||||
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
|
||||||
return self.supernode_get_sibling(siblingindex).get_bootbin_name()
|
|
||||||
|
|
||||||
def supernode_get_sibling_rootfs_path(self, siblingindex):
|
|
||||||
return self.supernode_get_sibling(siblingindex).get_job().rootfs_path()
|
|
||||||
|
|
||||||
def supernode_get_sibling_bootbinary_path(self, siblingindex):
|
|
||||||
return self.supernode_get_sibling(siblingindex).get_job().bootbinary_path()
|
|
||||||
|
|
||||||
def supernode_get_sibling_link_latency(self, siblingindex):
|
|
||||||
return self.supernode_get_sibling(siblingindex).server_link_latency
|
|
||||||
|
|
||||||
def supernode_get_sibling_bw_max(self, siblingindex):
|
|
||||||
return self.supernode_get_sibling(siblingindex).server_bw_max
|
|
||||||
|
|
||||||
def supernode_get_sibling_shmemportname(self, siblingindex):
|
|
||||||
return self.supernode_get_sibling(siblingindex).uplinks[0].get_global_link_id()
|
|
||||||
|
|
||||||
def run_sim_start_command(self, slotno):
|
|
||||||
""" get/run the command to run a simulation. assumes it will be
|
""" get/run the command to run a simulation. assumes it will be
|
||||||
called in a directory where its required_files are already located."""
|
called in a directory where its required_files are already located."""
|
||||||
|
|
||||||
num_siblings = self.supernode_get_num_siblings_plus_one()
|
num_siblings = self.supernode_get_num_siblings_plus_one()
|
||||||
|
|
||||||
all_macs = [self.get_mac_address()] + [self.supernode_get_sibling_mac_address(x) for x in range(1, num_siblings)]
|
all_macs = [self.get_mac_address()] + [self.supernode_get_sibling(x).get_mac_address() for x in range(1, num_siblings)]
|
||||||
all_rootfses = self.process_qcow2_rootfses([self.get_rootfs_name()] + [self.supernode_get_sibling_rootfs(x) for x in range(1, num_siblings)])
|
all_rootfses = self.process_qcow2_rootfses([self.get_rootfs_name()] + [self.supernode_get_sibling(x).get_rootfs_name() for x in range(1, num_siblings)])
|
||||||
all_bootbins = [self.get_bootbin_name()] + [self.supernode_get_sibling_bootbin(x) for x in range(1, num_siblings)]
|
all_bootbins = [self.get_bootbin_name()] + [self.supernode_get_sibling(x).get_bootbin_name() for x in range(1, num_siblings)]
|
||||||
all_linklatencies = [self.server_link_latency] + [self.supernode_get_sibling_link_latency(x) for x in range(1, num_siblings)]
|
all_linklatencies = [self.server_link_latency] + [self.supernode_get_sibling(x).server_link_latency for x in range(1, num_siblings)]
|
||||||
all_maxbws = [self.server_bw_max] + [self.supernode_get_sibling_bw_max(x) for x in range(1, num_siblings)]
|
all_maxbws = [self.server_bw_max] + [self.supernode_get_sibling(x).server_bw_max for x in range(1, num_siblings)]
|
||||||
|
|
||||||
all_shmemportnames = ["default" for x in range(num_siblings)]
|
all_shmemportnames = ["default" for x in range(num_siblings)]
|
||||||
if self.uplinks:
|
if self.uplinks:
|
||||||
all_shmemportnames = [self.uplinks[0].get_global_link_id()] + [self.supernode_get_sibling_shmemportname(x) for x in range(1, num_siblings)]
|
all_shmemportnames = [self.uplinks[0].get_global_link_id()] + [self.supernode_get_sibling(x).uplinks[0].get_global_link_id() for x in range(1, num_siblings)]
|
||||||
|
|
||||||
runcommand = self.server_hardware_config.get_boot_simulation_command(
|
assert (self.server_profile_interval is not None and all_bootbins is not None and self.trace_enable is not None and
|
||||||
|
self.trace_select is not None and self.trace_start is not None and self.trace_end is not None and self.trace_output_format is not None and
|
||||||
|
self.autocounter_readrate is not None and all_shmemportnames is not None and self.zerooutdram is not None and self.disable_asserts is not None and
|
||||||
|
self.print_start is not None and self.print_end is not None and self.print_cycle_prefix is not None)
|
||||||
|
|
||||||
|
runcommand = self.get_resolved_server_hardware_config().get_boot_simulation_command(
|
||||||
slotno, all_macs, all_rootfses, all_linklatencies, all_maxbws,
|
slotno, all_macs, all_rootfses, all_linklatencies, all_maxbws,
|
||||||
self.server_profile_interval, all_bootbins, self.trace_enable,
|
self.server_profile_interval, all_bootbins, self.trace_enable,
|
||||||
self.trace_select, self.trace_start, self.trace_end, self.trace_output_format,
|
self.trace_select, self.trace_start, self.trace_end, self.trace_output_format,
|
||||||
self.autocounter_readrate, all_shmemportnames, self.zerooutdram)
|
self.autocounter_readrate, all_shmemportnames, self.zerooutdram, self.disable_asserts,
|
||||||
|
self.print_start, self.print_end, self.print_cycle_prefix)
|
||||||
|
|
||||||
run(runcommand)
|
run(runcommand)
|
||||||
|
|
||||||
def get_required_files_local_paths(self):
|
def get_required_files_local_paths(self) -> List[Tuple[str, str]]:
|
||||||
""" Return local paths of all stuff needed to run this simulation as
|
""" Return local paths of all stuff needed to run this simulation as
|
||||||
an array. """
|
an array. """
|
||||||
|
|
||||||
|
@ -512,13 +545,17 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
||||||
def local_and_remote(filepath, index):
|
def local_and_remote(filepath, index):
|
||||||
return [filepath, get_path_trailing(filepath) + str(index)]
|
return [filepath, get_path_trailing(filepath) + str(index)]
|
||||||
|
|
||||||
all_paths = []
|
hw_cfg = self.get_resolved_server_hardware_config()
|
||||||
if self.get_job().rootfs_path() is not None:
|
|
||||||
all_paths.append([self.get_job().rootfs_path(),
|
|
||||||
self.get_rootfs_name()])
|
|
||||||
|
|
||||||
driver_path = self.server_hardware_config.get_local_driver_path()
|
all_paths = []
|
||||||
all_paths.append([driver_path, ''])
|
job_rootfs_path = self.get_job().rootfs_path()
|
||||||
|
if job_rootfs_path is not None:
|
||||||
|
self_rootfs_name = self.get_rootfs_name()
|
||||||
|
assert self_rootfs_name is not None
|
||||||
|
all_paths.append((job_rootfs_path, self_rootfs_name))
|
||||||
|
|
||||||
|
driver_path = hw_cfg.get_local_driver_path()
|
||||||
|
all_paths.append((driver_path, ''))
|
||||||
|
|
||||||
# shared libraries
|
# shared libraries
|
||||||
all_paths += get_local_shared_libraries(driver_path)
|
all_paths += get_local_shared_libraries(driver_path)
|
||||||
|
@ -526,30 +563,30 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
||||||
num_siblings = self.supernode_get_num_siblings_plus_one()
|
num_siblings = self.supernode_get_num_siblings_plus_one()
|
||||||
|
|
||||||
for x in range(1, num_siblings):
|
for x in range(1, num_siblings):
|
||||||
sibling_rootfs_path = self.supernode_get_sibling_rootfs_path(x)
|
sibling = self.supernode_get_sibling(x)
|
||||||
if sibling_rootfs_path is not None:
|
|
||||||
all_paths.append([sibling_rootfs_path,
|
|
||||||
self.supernode_get_sibling_rootfs(x)])
|
|
||||||
|
|
||||||
all_paths.append([self.get_job().bootbinary_path(),
|
sibling_job_rootfs_path = self.get_job().rootfs_path()
|
||||||
self.get_bootbin_name()])
|
if sibling_job_rootfs_path is not None:
|
||||||
|
sibling_rootfs_name = sibling.get_rootfs_name()
|
||||||
|
assert sibling_rootfs_name is not None
|
||||||
|
all_paths.append((sibling_job_rootfs_path, sibling_rootfs_name))
|
||||||
|
|
||||||
for x in range(1, num_siblings):
|
all_paths.append((sibling.get_job().bootbinary_path(),
|
||||||
all_paths.append([self.supernode_get_sibling_bootbinary_path(x),
|
sibling.get_bootbin_name()))
|
||||||
self.supernode_get_sibling_bootbin(x)])
|
|
||||||
|
|
||||||
all_paths.append([self.server_hardware_config.get_local_runtime_conf_path(), ''])
|
all_paths.append((self.get_job().bootbinary_path(),
|
||||||
|
self.get_bootbin_name()))
|
||||||
|
|
||||||
|
all_paths.append((hw_cfg.get_local_runtime_conf_path(), ''))
|
||||||
return all_paths
|
return all_paths
|
||||||
|
|
||||||
class FireSimDummyServerNode(FireSimServerNode):
|
class FireSimDummyServerNode(FireSimServerNode):
|
||||||
""" This is a dummy server node for supernode mode. """
|
""" This is a dummy server node for supernode mode. """
|
||||||
def __init__(self, server_hardware_config=None, server_link_latency=None,
|
def __init__(self, server_hardware_config: Optional[Union[RuntimeHWConfig, str]] = None, server_link_latency: Optional[int] = None,
|
||||||
server_bw_max=None):
|
server_bw_max: Optional[int] = None):
|
||||||
super(FireSimDummyServerNode, self).__init__(server_hardware_config,
|
super().__init__(server_hardware_config, server_link_latency, server_bw_max)
|
||||||
server_link_latency,
|
|
||||||
server_bw_max)
|
|
||||||
|
|
||||||
def allocate_nbds(self):
|
def allocate_nbds(self) -> None:
|
||||||
""" this is handled by the non-dummy node. override so it does nothing
|
""" this is handled by the non-dummy node. override so it does nothing
|
||||||
when called"""
|
when called"""
|
||||||
pass
|
pass
|
||||||
|
@ -562,13 +599,19 @@ class FireSimSwitchNode(FireSimNode):
|
||||||
much special configuration."""
|
much special configuration."""
|
||||||
|
|
||||||
# used to give switches a global ID
|
# used to give switches a global ID
|
||||||
SWITCHES_CREATED = 0
|
SWITCHES_CREATED: int = 0
|
||||||
|
switch_id_internal: int
|
||||||
|
switch_table: List[int]
|
||||||
|
switch_link_latency: Optional[int]
|
||||||
|
switch_switching_latency: Optional[int]
|
||||||
|
switch_bandwidth: Optional[int]
|
||||||
|
switch_builder: AbstractSwitchToSwitchConfig
|
||||||
|
|
||||||
def __init__(self, switching_latency=None, link_latency=None, bandwidth=None):
|
def __init__(self, switching_latency: Optional[int] = None, link_latency: Optional[int] = None, bandwidth: Optional[int] = None):
|
||||||
super(FireSimSwitchNode, self).__init__()
|
super().__init__()
|
||||||
self.switch_id_internal = FireSimSwitchNode.SWITCHES_CREATED
|
self.switch_id_internal = FireSimSwitchNode.SWITCHES_CREATED
|
||||||
FireSimSwitchNode.SWITCHES_CREATED += 1
|
FireSimSwitchNode.SWITCHES_CREATED += 1
|
||||||
self.switch_table = None
|
self.switch_table = []
|
||||||
self.switch_link_latency = link_latency
|
self.switch_link_latency = link_latency
|
||||||
self.switch_switching_latency = switching_latency
|
self.switch_switching_latency = switching_latency
|
||||||
self.switch_bandwidth = bandwidth
|
self.switch_bandwidth = bandwidth
|
||||||
|
@ -579,12 +622,12 @@ class FireSimSwitchNode(FireSimNode):
|
||||||
#self.switch_builder = None
|
#self.switch_builder = None
|
||||||
self.switch_builder = AbstractSwitchToSwitchConfig(self)
|
self.switch_builder = AbstractSwitchToSwitchConfig(self)
|
||||||
|
|
||||||
def build_switch_sim_binary(self):
|
def build_switch_sim_binary(self) -> None:
|
||||||
""" This actually emits a config and builds the switch binary that
|
""" This actually emits a config and builds the switch binary that
|
||||||
can be used to do the simulation. """
|
can be used to do the simulation. """
|
||||||
self.switch_builder.buildswitch()
|
self.switch_builder.buildswitch()
|
||||||
|
|
||||||
def get_required_files_local_paths(self):
|
def get_required_files_local_paths(self) -> List[Tuple[str, str]]:
|
||||||
""" Return local paths of all stuff needed to run this simulation as
|
""" Return local paths of all stuff needed to run this simulation as
|
||||||
array. """
|
array. """
|
||||||
all_paths = []
|
all_paths = []
|
||||||
|
@ -593,13 +636,13 @@ class FireSimSwitchNode(FireSimNode):
|
||||||
all_paths += get_local_shared_libraries(bin)
|
all_paths += get_local_shared_libraries(bin)
|
||||||
return all_paths
|
return all_paths
|
||||||
|
|
||||||
def get_switch_start_command(self):
|
def get_switch_start_command(self) -> str:
|
||||||
return self.switch_builder.run_switch_simulation_command()
|
return self.switch_builder.run_switch_simulation_command()
|
||||||
|
|
||||||
def get_switch_kill_command(self):
|
def get_switch_kill_command(self) -> str:
|
||||||
return self.switch_builder.kill_switch_simulation_command()
|
return self.switch_builder.kill_switch_simulation_command()
|
||||||
|
|
||||||
def copy_back_switchlog_from_run(self, job_results_dir, switch_slot_no):
|
def copy_back_switchlog_from_run(self, job_results_dir: str, switch_slot_no: int) -> None:
|
||||||
"""
|
"""
|
||||||
Copy back the switch log for this switch
|
Copy back the switch log for this switch
|
||||||
|
|
||||||
|
@ -620,8 +663,9 @@ class FireSimSwitchNode(FireSimNode):
|
||||||
get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)
|
get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)
|
||||||
|
|
||||||
|
|
||||||
def diagramstr(self):
|
def diagramstr(self) -> str:
|
||||||
msg = """{}:{}\n---------\ndownlinks: {}\nswitchingtable: {}""".format(
|
msg = f"FireSimSwitchNode:{self.switch_id_internal}\n"
|
||||||
"FireSimSwitchNode", str(self.switch_id_internal), ", ".join(map(str, self.downlinkmacs)),
|
msg += f"---------\n"
|
||||||
", ".join(map(str, self.switch_table)))
|
msg += f"""downlinks: {", ".join(map(str, self.downlinkmacs))}\n"""
|
||||||
|
msg += f"""switchingtable: {", ".join(map(str, self.switch_table))}"""
|
||||||
return msg
|
return msg
|
||||||
|
|
|
@ -1,25 +1,31 @@
|
||||||
""" This constructs a topology and performs a series of passes on it. """
|
""" This constructs a topology and performs a series of passes on it. """
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
import pprint
|
import pprint
|
||||||
import logging
|
import logging
|
||||||
import datetime
|
import datetime
|
||||||
|
from fabric.api import env, parallel, execute, run, local, warn_only # type: ignore
|
||||||
from runtools.switch_model_config import *
|
|
||||||
from runtools.firesim_topology_core import *
|
|
||||||
from runtools.utils import MacAddress
|
|
||||||
from fabric.api import * # type: ignore
|
|
||||||
from colorama import Fore, Style # type: ignore
|
from colorama import Fore, Style # type: ignore
|
||||||
import types
|
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
|
||||||
|
from runtools.firesim_topology_elements import FireSimServerNode, FireSimDummyServerNode, FireSimSwitchNode
|
||||||
|
from runtools.firesim_topology_core import FireSimTopology
|
||||||
|
from runtools.utils import MacAddress
|
||||||
from util.streamlogger import StreamLogger
|
from util.streamlogger import StreamLogger
|
||||||
|
|
||||||
|
from typing import Dict, Any, cast, List, TYPE_CHECKING, Callable
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from runtools.run_farm import RunFarm
|
||||||
|
from runtools.runtime_config import RuntimeHWDB
|
||||||
|
from runtools.workload import WorkloadConfig
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
@parallel # type: ignore
|
@parallel
|
||||||
def instance_liveness():
|
def instance_liveness() -> None:
|
||||||
""" Confirm that all instances are accessible (are running and can be ssh'ed into) first so that we don't run any
|
""" Confirm that all instances are accessible (are running and can be ssh'ed into) first so that we don't run any
|
||||||
actual firesim-related commands on only some of the run farm machines."""
|
actual firesim-related commands on only some of the run farm machines."""
|
||||||
rootLogger.info("""[{}] Checking if host instance is up...""".format(env.host_string))
|
rootLogger.info("""[{}] Checking if host instance is up...""".format(env.host_string))
|
||||||
|
@ -32,15 +38,39 @@ class FireSimTopologyWithPasses:
|
||||||
|
|
||||||
>>> tconf = FireSimTargetConfiguration("example_16config")
|
>>> tconf = FireSimTargetConfiguration("example_16config")
|
||||||
"""
|
"""
|
||||||
|
passes_used: List[str]
|
||||||
|
user_topology_name: str
|
||||||
|
no_net_num_nodes: int
|
||||||
|
run_farm: RunFarm
|
||||||
|
hwdb: RuntimeHWDB
|
||||||
|
workload: WorkloadConfig
|
||||||
|
firesimtopol: FireSimTopology
|
||||||
|
defaulthwconfig: str
|
||||||
|
defaultlinklatency: int
|
||||||
|
defaultswitchinglatency: int
|
||||||
|
defaultnetbandwidth: int
|
||||||
|
defaultprofileinterval: int
|
||||||
|
defaulttraceenable: bool
|
||||||
|
defaulttraceselect: str
|
||||||
|
defaulttracestart: str
|
||||||
|
defaulttraceend: str
|
||||||
|
defaulttraceoutputformat: str
|
||||||
|
defaultautocounterreadrate: int
|
||||||
|
defaultzerooutdram: bool
|
||||||
|
defaultdisableasserts: bool
|
||||||
|
defaultprintstart: str
|
||||||
|
defaultprintend: str
|
||||||
|
defaultprintcycleprefix: bool
|
||||||
|
terminateoncompletion: bool
|
||||||
|
|
||||||
def __init__(self, user_topology_name, no_net_num_nodes, run_farm, hwdb,
|
def __init__(self, user_topology_name: str, no_net_num_nodes: int, run_farm: RunFarm, hwdb: RuntimeHWDB,
|
||||||
defaulthwconfig, workload, defaultlinklatency, defaultswitchinglatency,
|
defaulthwconfig: str, workload: WorkloadConfig, defaultlinklatency: int, defaultswitchinglatency: int,
|
||||||
defaultnetbandwidth, defaultprofileinterval,
|
defaultnetbandwidth: int, defaultprofileinterval: int,
|
||||||
defaulttraceenable, defaulttraceselect, defaulttracestart, defaulttraceend,
|
defaulttraceenable: bool, defaulttraceselect: str, defaulttracestart: str, defaulttraceend: str,
|
||||||
defaulttraceoutputformat,
|
defaulttraceoutputformat: str,
|
||||||
defaultautocounterreadrate, terminateoncompletion,
|
defaultautocounterreadrate: int, terminateoncompletion: bool,
|
||||||
defaultzerooutdram, defaultdisableasserts,
|
defaultzerooutdram: bool, defaultdisableasserts: bool,
|
||||||
defaultprintstart, defaultprintend, defaultprintcycleprefix):
|
defaultprintstart: str, defaultprintend: str, defaultprintcycleprefix: bool) -> None:
|
||||||
self.passes_used = []
|
self.passes_used = []
|
||||||
self.user_topology_name = user_topology_name
|
self.user_topology_name = user_topology_name
|
||||||
self.no_net_num_nodes = no_net_num_nodes
|
self.no_net_num_nodes = no_net_num_nodes
|
||||||
|
@ -68,12 +98,8 @@ class FireSimTopologyWithPasses:
|
||||||
|
|
||||||
self.phase_one_passes()
|
self.phase_one_passes()
|
||||||
|
|
||||||
def pass_return_dfs(self):
|
|
||||||
""" Just return the nodes in DFS order """
|
|
||||||
return self.firesimtopol.get_dfs_order()
|
|
||||||
|
|
||||||
|
def pass_assign_mac_addresses(self) -> None:
|
||||||
def pass_assign_mac_addresses(self):
|
|
||||||
""" DFS through the topology to assign mac addresses """
|
""" DFS through the topology to assign mac addresses """
|
||||||
self.passes_used.append("pass_assign_mac_addresses")
|
self.passes_used.append("pass_assign_mac_addresses")
|
||||||
|
|
||||||
|
@ -84,7 +110,7 @@ class FireSimTopologyWithPasses:
|
||||||
node.assign_mac_address(MacAddress())
|
node.assign_mac_address(MacAddress())
|
||||||
|
|
||||||
|
|
||||||
def pass_compute_switching_tables(self):
|
def pass_compute_switching_tables(self) -> None:
|
||||||
""" This creates the MAC addr -> port lists for switch nodes.
|
""" This creates the MAC addr -> port lists for switch nodes.
|
||||||
|
|
||||||
a) First, a pass that computes "downlinkmacs" for each node, which
|
a) First, a pass that computes "downlinkmacs" for each node, which
|
||||||
|
@ -110,7 +136,11 @@ class FireSimTopologyWithPasses:
|
||||||
if isinstance(node, FireSimServerNode):
|
if isinstance(node, FireSimServerNode):
|
||||||
node.downlinkmacs = [node.get_mac_address()]
|
node.downlinkmacs = [node.get_mac_address()]
|
||||||
else:
|
else:
|
||||||
childdownlinkmacs = [x.get_downlink_side().downlinkmacs for x in node.downlinks]
|
childdownlinkmacs: List[List[MacAddress]] = []
|
||||||
|
for x in node.downlinks:
|
||||||
|
childdownlinkmacs.append(x.get_downlink_side().downlinkmacs)
|
||||||
|
|
||||||
|
# flatten
|
||||||
node.downlinkmacs = reduce(lambda x, y: x + y, childdownlinkmacs)
|
node.downlinkmacs = reduce(lambda x, y: x + y, childdownlinkmacs)
|
||||||
|
|
||||||
switches_dfs_order = self.firesimtopol.get_dfs_order_switches()
|
switches_dfs_order = self.firesimtopol.get_dfs_order_switches()
|
||||||
|
@ -127,7 +157,7 @@ class FireSimTopologyWithPasses:
|
||||||
|
|
||||||
switch.switch_table = switchtab
|
switch.switch_table = switchtab
|
||||||
|
|
||||||
def pass_create_topology_diagram(self):
|
def pass_create_topology_diagram(self) -> None:
|
||||||
""" Produce a PDF that shows a diagram of the network.
|
""" Produce a PDF that shows a diagram of the network.
|
||||||
Useful for debugging passes to see what has been done to particular
|
Useful for debugging passes to see what has been done to particular
|
||||||
nodes. """
|
nodes. """
|
||||||
|
@ -150,12 +180,12 @@ class FireSimTopologyWithPasses:
|
||||||
switches_dfs_order = self.firesimtopol.get_dfs_order_switches()
|
switches_dfs_order = self.firesimtopol.get_dfs_order_switches()
|
||||||
for node in switches_dfs_order:
|
for node in switches_dfs_order:
|
||||||
for downlink in node.downlinks:
|
for downlink in node.downlinks:
|
||||||
downlink = downlink.get_downlink_side()
|
downlink_side = downlink.get_downlink_side()
|
||||||
gviz_graph.edge(str(node), str(downlink))
|
gviz_graph.edge(str(node), str(downlink_side))
|
||||||
|
|
||||||
gviz_graph.render(view=False)
|
gviz_graph.render(view=False)
|
||||||
|
|
||||||
def pass_no_net_host_mapping(self):
|
def pass_no_net_host_mapping(self) -> None:
|
||||||
# only if we have no networks - pack simulations
|
# only if we have no networks - pack simulations
|
||||||
# assumes the user has provided enough or more slots
|
# assumes the user has provided enough or more slots
|
||||||
servers = self.firesimtopol.get_dfs_order_servers()
|
servers = self.firesimtopol.get_dfs_order_servers()
|
||||||
|
@ -181,7 +211,7 @@ class FireSimTopologyWithPasses:
|
||||||
return
|
return
|
||||||
assert serverind == len(servers), "ERR: all servers were not assigned to a host."
|
assert serverind == len(servers), "ERR: all servers were not assigned to a host."
|
||||||
|
|
||||||
def pass_simple_networked_host_node_mapping(self):
|
def pass_simple_networked_host_node_mapping(self) -> None:
|
||||||
""" A very simple host mapping strategy. """
|
""" A very simple host mapping strategy. """
|
||||||
switches = self.firesimtopol.get_dfs_order_switches()
|
switches = self.firesimtopol.get_dfs_order_switches()
|
||||||
f1_2s_used = 0
|
f1_2s_used = 0
|
||||||
|
@ -193,12 +223,13 @@ class FireSimTopologyWithPasses:
|
||||||
# Filter out FireSimDummyServerNodes for actually deploying.
|
# Filter out FireSimDummyServerNodes for actually deploying.
|
||||||
# Infrastructure after this point will automatically look at the
|
# Infrastructure after this point will automatically look at the
|
||||||
# FireSimDummyServerNodes if a FireSimSuperNodeServerNode is used
|
# FireSimDummyServerNodes if a FireSimSuperNodeServerNode is used
|
||||||
downlinknodes = list(map(lambda x: x.get_downlink_side(), [downlink for downlink in switch.downlinks if not isinstance(downlink.get_downlink_side(), FireSimDummyServerNode)]))
|
alldownlinknodes = list(map(lambda x: x.get_downlink_side(), [downlink for downlink in switch.downlinks if not isinstance(downlink.get_downlink_side(), FireSimDummyServerNode)]))
|
||||||
if all([isinstance(x, FireSimSwitchNode) for x in downlinknodes]):
|
if all([isinstance(x, FireSimSwitchNode) for x in alldownlinknodes]):
|
||||||
# all downlinks are switches
|
# all downlinks are switches
|
||||||
self.run_farm.m4_16s[m4_16s_used].add_switch(switch)
|
self.run_farm.m4_16s[m4_16s_used].add_switch(switch)
|
||||||
m4_16s_used += 1
|
m4_16s_used += 1
|
||||||
elif all([isinstance(x, FireSimServerNode) for x in downlinknodes]):
|
elif all([isinstance(x, FireSimServerNode) for x in alldownlinknodes]):
|
||||||
|
downlinknodes = cast(List[FireSimServerNode], alldownlinknodes)
|
||||||
# all downlinks are simulations
|
# all downlinks are simulations
|
||||||
if (len(downlinknodes) == 1) and (f1_2s_used < len(self.run_farm.f1_2s)):
|
if (len(downlinknodes) == 1) and (f1_2s_used < len(self.run_farm.f1_2s)):
|
||||||
self.run_farm.f1_2s[f1_2s_used].add_switch(switch)
|
self.run_farm.f1_2s[f1_2s_used].add_switch(switch)
|
||||||
|
@ -217,7 +248,7 @@ class FireSimTopologyWithPasses:
|
||||||
else:
|
else:
|
||||||
assert False, "Mixed downlinks currently not supported."""
|
assert False, "Mixed downlinks currently not supported."""
|
||||||
|
|
||||||
def mapping_use_one_f1_16xlarge(self):
|
def mapping_use_one_f1_16xlarge(self) -> None:
|
||||||
""" Just put everything on one f1.16xlarge """
|
""" Just put everything on one f1.16xlarge """
|
||||||
switches = self.firesimtopol.get_dfs_order_switches()
|
switches = self.firesimtopol.get_dfs_order_switches()
|
||||||
f1_2s_used = 0
|
f1_2s_used = 0
|
||||||
|
@ -226,15 +257,16 @@ class FireSimTopologyWithPasses:
|
||||||
|
|
||||||
for switch in switches:
|
for switch in switches:
|
||||||
self.run_farm.f1_16s[f1_16s_used].add_switch(switch)
|
self.run_farm.f1_16s[f1_16s_used].add_switch(switch)
|
||||||
downlinknodes = map(lambda x: x.get_downlink_side(), switch.downlinks)
|
alldownlinknodes = map(lambda x: x.get_downlink_side(), switch.downlinks)
|
||||||
if all([isinstance(x, FireSimServerNode) for x in downlinknodes]):
|
if all([isinstance(x, FireSimServerNode) for x in alldownlinknodes]):
|
||||||
|
downlinknodes = cast(List[FireSimServerNode], alldownlinknodes)
|
||||||
for server in downlinknodes:
|
for server in downlinknodes:
|
||||||
self.run_farm.f1_16s[f1_16s_used].add_simulation(server)
|
self.run_farm.f1_16s[f1_16s_used].add_simulation(server)
|
||||||
elif any([isinstance(x, FireSimServerNode) for x in downlinknodes]):
|
elif any([isinstance(x, FireSimServerNode) for x in downlinknodes]):
|
||||||
assert False, "MIXED DOWNLINKS NOT SUPPORTED."
|
assert False, "MIXED DOWNLINKS NOT SUPPORTED."
|
||||||
f1_16s_used += 1
|
f1_16s_used += 1
|
||||||
|
|
||||||
def pass_perform_host_node_mapping(self):
|
def pass_perform_host_node_mapping(self) -> None:
|
||||||
""" This pass assigns host nodes to nodes in the abstract FireSim
|
""" This pass assigns host nodes to nodes in the abstract FireSim
|
||||||
configuration tree.
|
configuration tree.
|
||||||
|
|
||||||
|
@ -252,15 +284,14 @@ class FireSimTopologyWithPasses:
|
||||||
# all roots are servers, so we're in no_net_config
|
# all roots are servers, so we're in no_net_config
|
||||||
# if the user has specified any 16xlarges, we assign to them first
|
# if the user has specified any 16xlarges, we assign to them first
|
||||||
self.pass_no_net_host_mapping()
|
self.pass_no_net_host_mapping()
|
||||||
return
|
|
||||||
else:
|
else:
|
||||||
# now, we're handling the cycle-accurate networked simulation case
|
# now, we're handling the cycle-accurate networked simulation case
|
||||||
# currently, we only handle the case where
|
# currently, we only handle the case where
|
||||||
self.pass_simple_networked_host_node_mapping()
|
self.pass_simple_networked_host_node_mapping()
|
||||||
elif type(self.firesimtopol.custom_mapper) == types.FunctionType:
|
elif callable(self.firesimtopol.custom_mapper):
|
||||||
""" call the mapper fn defined in the topology itself. """
|
""" call the mapper fn defined in the topology itself. """
|
||||||
self.firesimtopol.custom_mapper(self)
|
self.firesimtopol.custom_mapper(self)
|
||||||
elif type(self.firesimtopol.custom_mapper) == str:
|
elif isinstance(self.firesimtopol.custom_mapper, str):
|
||||||
""" assume that the mapping strategy is a custom pre-defined strategy
|
""" assume that the mapping strategy is a custom pre-defined strategy
|
||||||
given in this class, supplied as a string in the topology """
|
given in this class, supplied as a string in the topology """
|
||||||
mapperfunc = getattr(self, self.firesimtopol.custom_mapper)
|
mapperfunc = getattr(self, self.firesimtopol.custom_mapper)
|
||||||
|
@ -268,31 +299,35 @@ class FireSimTopologyWithPasses:
|
||||||
else:
|
else:
|
||||||
assert False, "IMPROPER MAPPING CONFIGURATION"
|
assert False, "IMPROPER MAPPING CONFIGURATION"
|
||||||
|
|
||||||
def pass_apply_default_hwconfig(self):
|
def pass_apply_default_hwconfig(self) -> None:
|
||||||
""" This is the default mapping pass for hardware configurations - it
|
""" This is the default mapping pass for hardware configurations - it
|
||||||
does 3 things:
|
does 3 things:
|
||||||
1) If a node has a hardware config assigned (as a string), replace
|
1) If a node has a hardware config assigned (as a string), replace
|
||||||
it with the appropriate RuntimeHWConfig object.
|
it with the appropriate RuntimeHWConfig object. If it is already a
|
||||||
|
RuntimeHWConfig object then keep it the same.
|
||||||
2) If a node's hardware config is none, give it the default
|
2) If a node's hardware config is none, give it the default
|
||||||
hardware config.
|
hardware config.
|
||||||
3) In either case, call get_deploytriplet_for_config() once to
|
3) In either case, call get_deploytriplet_for_config() once to
|
||||||
make the API call and cache the result for the deploytriplet.
|
make the API call and cache the result for the deploytriplet.
|
||||||
"""
|
"""
|
||||||
servers = self.firesimtopol.get_dfs_order_servers()
|
servers = self.firesimtopol.get_dfs_order_servers()
|
||||||
defaulthwconfig_obj = self.hwdb.get_runtimehwconfig_from_name(self.defaulthwconfig)
|
|
||||||
|
|
||||||
for server in servers:
|
for server in servers:
|
||||||
servhwconf = server.get_server_hardware_config()
|
hw_cfg = server.get_server_hardware_config()
|
||||||
if servhwconf is None:
|
if hw_cfg is None:
|
||||||
# 2)
|
# 2)
|
||||||
server.set_server_hardware_config(defaulthwconfig_obj)
|
defaulthwconfig_obj = self.hwdb.get_runtimehwconfig_from_name(self.defaulthwconfig)
|
||||||
|
hw_cfg = defaulthwconfig_obj
|
||||||
else:
|
else:
|
||||||
# 1)
|
if isinstance(hw_cfg, str):
|
||||||
server.set_server_hardware_config(self.hwdb.get_runtimehwconfig_from_name(servhwconf))
|
# 1) str
|
||||||
|
hw_cfg = self.hwdb.get_runtimehwconfig_from_name(hw_cfg)
|
||||||
|
# 1) hwcfg
|
||||||
# 3)
|
# 3)
|
||||||
server.get_server_hardware_config().get_deploytriplet_for_config()
|
hw_cfg.get_deploytriplet_for_config()
|
||||||
|
server.set_server_hardware_config(hw_cfg)
|
||||||
|
|
||||||
def pass_apply_default_network_params(self):
|
def pass_apply_default_network_params(self) -> None:
|
||||||
""" If the user has not set per-node network parameters in the topology,
|
""" If the user has not set per-node network parameters in the topology,
|
||||||
apply the defaults. """
|
apply the defaults. """
|
||||||
allnodes = self.firesimtopol.get_dfs_order()
|
allnodes = self.firesimtopol.get_dfs_order()
|
||||||
|
@ -338,7 +373,7 @@ class FireSimTopologyWithPasses:
|
||||||
node.print_cycle_prefix = self.defaultprintcycleprefix
|
node.print_cycle_prefix = self.defaultprintcycleprefix
|
||||||
|
|
||||||
|
|
||||||
def pass_allocate_nbd_devices(self):
|
def pass_allocate_nbd_devices(self) -> None:
|
||||||
""" allocate NBD devices. this must be done here to preserve the
|
""" allocate NBD devices. this must be done here to preserve the
|
||||||
data structure for use in runworkload teardown. """
|
data structure for use in runworkload teardown. """
|
||||||
servers = self.firesimtopol.get_dfs_order_servers()
|
servers = self.firesimtopol.get_dfs_order_servers()
|
||||||
|
@ -346,13 +381,14 @@ class FireSimTopologyWithPasses:
|
||||||
server.allocate_nbds()
|
server.allocate_nbds()
|
||||||
|
|
||||||
|
|
||||||
def pass_assign_jobs(self):
|
def pass_assign_jobs(self) -> None:
|
||||||
""" assign jobs to simulations. """
|
""" assign jobs to simulations. """
|
||||||
servers = self.firesimtopol.get_dfs_order_servers()
|
servers = self.firesimtopol.get_dfs_order_servers()
|
||||||
[servers[i].assign_job(self.workload.get_job(i)) for i in range(len(servers))]
|
for i in range(len(servers)):
|
||||||
|
servers[i].assign_job(self.workload.get_job(i))
|
||||||
|
|
||||||
|
|
||||||
def phase_one_passes(self):
|
def phase_one_passes(self) -> None:
|
||||||
""" These are passes that can run without requiring host-node binding.
|
""" These are passes that can run without requiring host-node binding.
|
||||||
i.e. can be run before you have run launchrunfarm. They're run
|
i.e. can be run before you have run launchrunfarm. They're run
|
||||||
automatically when creating this object. """
|
automatically when creating this object. """
|
||||||
|
@ -366,15 +402,15 @@ class FireSimTopologyWithPasses:
|
||||||
|
|
||||||
self.pass_create_topology_diagram()
|
self.pass_create_topology_diagram()
|
||||||
|
|
||||||
def pass_build_required_drivers(self):
|
def pass_build_required_drivers(self) -> None:
|
||||||
""" Build all FPGA drivers. The method we're calling here won't actually
|
""" Build all FPGA drivers. The method we're calling here won't actually
|
||||||
repeat the build process more than once per run of the manager. """
|
repeat the build process more than once per run of the manager. """
|
||||||
servers = self.firesimtopol.get_dfs_order_servers()
|
servers = self.firesimtopol.get_dfs_order_servers()
|
||||||
|
|
||||||
for server in servers:
|
for server in servers:
|
||||||
server.get_server_hardware_config().build_fpga_driver()
|
server.get_resolved_server_hardware_config().build_fpga_driver()
|
||||||
|
|
||||||
def pass_build_required_switches(self):
|
def pass_build_required_switches(self) -> None:
|
||||||
""" Build all the switches required for this simulation. """
|
""" Build all the switches required for this simulation. """
|
||||||
# the way the switch models are designed, this requires hosts to be
|
# the way the switch models are designed, this requires hosts to be
|
||||||
# bound to instances.
|
# bound to instances.
|
||||||
|
@ -383,7 +419,7 @@ class FireSimTopologyWithPasses:
|
||||||
switch.build_switch_sim_binary()
|
switch.build_switch_sim_binary()
|
||||||
|
|
||||||
|
|
||||||
def infrasetup_passes(self, use_mock_instances_for_testing):
|
def infrasetup_passes(self, use_mock_instances_for_testing: bool) -> None:
|
||||||
""" extra passes needed to do infrasetup """
|
""" extra passes needed to do infrasetup """
|
||||||
if use_mock_instances_for_testing:
|
if use_mock_instances_for_testing:
|
||||||
self.run_farm.bind_mock_instances_to_objects()
|
self.run_farm.bind_mock_instances_to_objects()
|
||||||
|
@ -393,7 +429,7 @@ class FireSimTopologyWithPasses:
|
||||||
self.pass_build_required_switches()
|
self.pass_build_required_switches()
|
||||||
|
|
||||||
@parallel
|
@parallel
|
||||||
def infrasetup_node_wrapper(runfarm):
|
def infrasetup_node_wrapper(runfarm: RunFarm) -> None:
|
||||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||||
my_node.instance_deploy_manager.infrasetup_instance()
|
my_node.instance_deploy_manager.infrasetup_instance()
|
||||||
|
|
||||||
|
@ -401,7 +437,7 @@ class FireSimTopologyWithPasses:
|
||||||
execute(instance_liveness, hosts=all_runfarm_ips)
|
execute(instance_liveness, hosts=all_runfarm_ips)
|
||||||
execute(infrasetup_node_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
execute(infrasetup_node_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||||
|
|
||||||
def boot_simulation_passes(self, use_mock_instances_for_testing, skip_instance_binding=False):
|
def boot_simulation_passes(self, use_mock_instances_for_testing: bool, skip_instance_binding: bool = False) -> None:
|
||||||
""" Passes that setup for boot and boot the simulation.
|
""" Passes that setup for boot and boot the simulation.
|
||||||
skip instance binding lets users not call the binding pass on the run_farm
|
skip instance binding lets users not call the binding pass on the run_farm
|
||||||
again, e.g. if this was called by runworkload (because runworkload calls
|
again, e.g. if this was called by runworkload (because runworkload calls
|
||||||
|
@ -417,7 +453,7 @@ class FireSimTopologyWithPasses:
|
||||||
self.run_farm.bind_real_instances_to_objects()
|
self.run_farm.bind_real_instances_to_objects()
|
||||||
|
|
||||||
@parallel
|
@parallel
|
||||||
def boot_switch_wrapper(runfarm):
|
def boot_switch_wrapper(runfarm: RunFarm) -> None:
|
||||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||||
my_node.instance_deploy_manager.start_switches_instance()
|
my_node.instance_deploy_manager.start_switches_instance()
|
||||||
|
|
||||||
|
@ -426,13 +462,13 @@ class FireSimTopologyWithPasses:
|
||||||
execute(boot_switch_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
execute(boot_switch_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||||
|
|
||||||
@parallel
|
@parallel
|
||||||
def boot_simulation_wrapper(runfarm):
|
def boot_simulation_wrapper(runfarm: RunFarm) -> None:
|
||||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||||
my_node.instance_deploy_manager.start_simulations_instance()
|
my_node.instance_deploy_manager.start_simulations_instance()
|
||||||
|
|
||||||
execute(boot_simulation_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
execute(boot_simulation_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||||
|
|
||||||
def kill_simulation_passes(self, use_mock_instances_for_testing, disconnect_all_nbds=True):
|
def kill_simulation_passes(self, use_mock_instances_for_testing: bool, disconnect_all_nbds: bool = True) -> None:
|
||||||
""" Passes that kill the simulator. """
|
""" Passes that kill the simulator. """
|
||||||
if use_mock_instances_for_testing:
|
if use_mock_instances_for_testing:
|
||||||
self.run_farm.bind_mock_instances_to_objects()
|
self.run_farm.bind_mock_instances_to_objects()
|
||||||
|
@ -442,19 +478,19 @@ class FireSimTopologyWithPasses:
|
||||||
all_runfarm_ips = [x.get_private_ip() for x in self.run_farm.get_all_host_nodes()]
|
all_runfarm_ips = [x.get_private_ip() for x in self.run_farm.get_all_host_nodes()]
|
||||||
|
|
||||||
@parallel
|
@parallel
|
||||||
def kill_switch_wrapper(runfarm):
|
def kill_switch_wrapper(runfarm: RunFarm) -> None:
|
||||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||||
my_node.instance_deploy_manager.kill_switches_instance()
|
my_node.instance_deploy_manager.kill_switches_instance()
|
||||||
|
|
||||||
@parallel
|
@parallel
|
||||||
def kill_simulation_wrapper(runfarm):
|
def kill_simulation_wrapper(runfarm: RunFarm) -> None:
|
||||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||||
my_node.instance_deploy_manager.kill_simulations_instance(disconnect_all_nbds=disconnect_all_nbds)
|
my_node.instance_deploy_manager.kill_simulations_instance(disconnect_all_nbds=disconnect_all_nbds)
|
||||||
|
|
||||||
execute(kill_switch_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
execute(kill_switch_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||||
execute(kill_simulation_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
execute(kill_simulation_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||||
|
|
||||||
def screens():
|
def screens() -> None:
|
||||||
""" poll on screens to make sure kill succeeded. """
|
""" poll on screens to make sure kill succeeded. """
|
||||||
with warn_only():
|
with warn_only():
|
||||||
rootLogger.info("Confirming exit...")
|
rootLogger.info("Confirming exit...")
|
||||||
|
@ -472,7 +508,7 @@ class FireSimTopologyWithPasses:
|
||||||
|
|
||||||
execute(screens, hosts=all_runfarm_ips)
|
execute(screens, hosts=all_runfarm_ips)
|
||||||
|
|
||||||
def run_workload_passes(self, use_mock_instances_for_testing):
|
def run_workload_passes(self, use_mock_instances_for_testing: bool) -> None:
|
||||||
""" extra passes needed to do runworkload. """
|
""" extra passes needed to do runworkload. """
|
||||||
if use_mock_instances_for_testing:
|
if use_mock_instances_for_testing:
|
||||||
self.run_farm.bind_mock_instances_to_objects()
|
self.run_farm.bind_mock_instances_to_objects()
|
||||||
|
@ -491,14 +527,14 @@ class FireSimTopologyWithPasses:
|
||||||
self.boot_simulation_passes(False, skip_instance_binding=True)
|
self.boot_simulation_passes(False, skip_instance_binding=True)
|
||||||
|
|
||||||
@parallel
|
@parallel
|
||||||
def monitor_jobs_wrapper(runfarm, completed_jobs, teardown, terminateoncompletion, job_results_dir):
|
def monitor_jobs_wrapper(runfarm, completed_jobs: List[str], teardown: bool, terminateoncompletion: bool, job_results_dir: str) -> Dict[str, Dict[str, bool]]:
|
||||||
""" on each instance, check over its switches and simulations
|
""" on each instance, check over its switches and simulations
|
||||||
to copy results off. """
|
to copy results off. """
|
||||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||||
return my_node.instance_deploy_manager.monitor_jobs_instance(completed_jobs, teardown, terminateoncompletion, job_results_dir)
|
return my_node.instance_deploy_manager.monitor_jobs_instance(completed_jobs, teardown, terminateoncompletion, job_results_dir)
|
||||||
|
|
||||||
|
|
||||||
def loop_logger(instancestates, terminateoncompletion):
|
def loop_logger(instancestates: Dict[str, Any], terminateoncompletion: bool) -> None:
|
||||||
""" Print the simulation status nicely. """
|
""" Print the simulation status nicely. """
|
||||||
|
|
||||||
instancestate_map = dict()
|
instancestate_map = dict()
|
||||||
|
@ -541,6 +577,7 @@ class FireSimTopologyWithPasses:
|
||||||
rootLogger.info("""FireSim Simulation Status @ {}""".format(str(datetime.datetime.utcnow())))
|
rootLogger.info("""FireSim Simulation Status @ {}""".format(str(datetime.datetime.utcnow())))
|
||||||
rootLogger.info("-"*80)
|
rootLogger.info("-"*80)
|
||||||
rootLogger.info("""This workload's output is located in:\n{}""".format(self.workload.job_results_dir))
|
rootLogger.info("""This workload's output is located in:\n{}""".format(self.workload.job_results_dir))
|
||||||
|
assert isinstance(rootLogger.handlers[0], logging.FileHandler)
|
||||||
rootLogger.info("""This run's log is located in:\n{}""".format(rootLogger.handlers[0].baseFilename))
|
rootLogger.info("""This run's log is located in:\n{}""".format(rootLogger.handlers[0].baseFilename))
|
||||||
rootLogger.info("""This status will update every 10s.""")
|
rootLogger.info("""This status will update every 10s.""")
|
||||||
rootLogger.info("-"*80)
|
rootLogger.info("-"*80)
|
||||||
|
@ -603,7 +640,8 @@ class FireSimTopologyWithPasses:
|
||||||
|
|
||||||
jobs_complete_dict = dict()
|
jobs_complete_dict = dict()
|
||||||
simstates = [x['sims'] for x in instancestates.values()]
|
simstates = [x['sims'] for x in instancestates.values()]
|
||||||
global_status = [jobs_complete_dict.update(x) for x in simstates]
|
for x in simstates:
|
||||||
|
jobs_complete_dict.update(x)
|
||||||
global_status = jobs_complete_dict.values()
|
global_status = jobs_complete_dict.values()
|
||||||
rootLogger.debug("jobs complete dict " + str(jobs_complete_dict))
|
rootLogger.debug("jobs complete dict " + str(jobs_complete_dict))
|
||||||
rootLogger.debug("global status: " + str(global_status))
|
rootLogger.debug("global status: " + str(global_status))
|
||||||
|
|
|
@ -1,19 +1,27 @@
|
||||||
""" Run Farm management. """
|
""" Run Farm management. """
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from awstools.awstools import *
|
|
||||||
from fabric.api import * # type: ignore
|
|
||||||
from fabric.contrib.project import rsync_project # type: ignore
|
|
||||||
from util.streamlogger import StreamLogger
|
|
||||||
import time
|
import time
|
||||||
|
import os
|
||||||
|
from datetime import timedelta
|
||||||
|
from fabric.api import run, env, prefix, put, cd, warn_only, local, settings, hide # type: ignore
|
||||||
|
from fabric.contrib.project import rsync_project # type: ignore
|
||||||
from os.path import join as pjoin
|
from os.path import join as pjoin
|
||||||
|
|
||||||
|
from awstools.awstools import instances_sorted_by_avail_ip, get_run_instances_by_tag_type, get_private_ips_for_instances, launch_run_instances, wait_on_instance_launches, terminate_instances, get_instance_ids_for_instances
|
||||||
|
from util.streamlogger import StreamLogger
|
||||||
|
|
||||||
|
from typing import Dict, Optional, List, Union, TYPE_CHECKING
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from mypy_boto3_ec2.service_resource import Instance as EC2InstanceResource
|
||||||
|
from runtools.firesim_topology_elements import FireSimSwitchNode, FireSimServerNode
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
def remote_kmsg(message):
|
def remote_kmsg(message: str) -> None:
|
||||||
""" This will let you write whatever is passed as message into the kernel
|
""" This will let you write whatever is passed as message into the kernel
|
||||||
log of the remote machine. Useful for figuring what the manager is doing
|
log of the remote machine. Useful for figuring what the manager is doing
|
||||||
w.r.t output from kernel stuff on the remote node. """
|
w.r.t output from kernel stuff on the remote node. """
|
||||||
|
@ -24,28 +32,32 @@ class MockBoto3Instance:
|
||||||
""" This is used for testing without actually launching instances. """
|
""" This is used for testing without actually launching instances. """
|
||||||
|
|
||||||
# don't use 0 unless you want stuff copied to your own instance.
|
# don't use 0 unless you want stuff copied to your own instance.
|
||||||
base_ip = 1
|
base_ip: int = 1
|
||||||
|
ip_addr_int: int
|
||||||
|
private_ip_address: str
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.ip_addr_int = MockBoto3Instance.base_ip
|
self.ip_addr_int = MockBoto3Instance.base_ip
|
||||||
MockBoto3Instance.base_ip += 1
|
MockBoto3Instance.base_ip += 1
|
||||||
self.private_ip_address = ".".join([str((self.ip_addr_int >> (8*x)) & 0xFF) for x in [3, 2, 1, 0]])
|
self.private_ip_address = ".".join([str((self.ip_addr_int >> (8*x)) & 0xFF) for x in [3, 2, 1, 0]])
|
||||||
|
|
||||||
|
|
||||||
class NBDTracker(object):
|
class NBDTracker:
|
||||||
""" Track allocation of NBD devices on an instance. Used for mounting
|
""" Track allocation of NBD devices on an instance. Used for mounting
|
||||||
qcow2 images."""
|
qcow2 images."""
|
||||||
|
|
||||||
# max number of NBDs allowed by the nbd.ko kernel module
|
# max number of NBDs allowed by the nbd.ko kernel module
|
||||||
NBDS_MAX = 128
|
NBDS_MAX: int = 128
|
||||||
|
unallocd: List[str]
|
||||||
|
allocated_dict: Dict[str, str]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.unallocd = ["""/dev/nbd{}""".format(x) for x in range(self.NBDS_MAX)]
|
self.unallocd = ["""/dev/nbd{}""".format(x) for x in range(self.NBDS_MAX)]
|
||||||
|
|
||||||
# this is a mapping from .qcow2 image name to nbd device.
|
# this is a mapping from .qcow2 image name to nbd device.
|
||||||
self.allocated_dict = {}
|
self.allocated_dict = {}
|
||||||
|
|
||||||
def get_nbd_for_imagename(self, imagename):
|
def get_nbd_for_imagename(self, imagename: str) -> str:
|
||||||
""" Call this when you need to allocate an nbd for a particular image,
|
""" Call this when you need to allocate an nbd for a particular image,
|
||||||
or when you need to know what nbd device is for that image.
|
or when you need to know what nbd device is for that image.
|
||||||
|
|
||||||
|
@ -60,40 +72,41 @@ class NBDTracker(object):
|
||||||
return self.allocated_dict[imagename]
|
return self.allocated_dict[imagename]
|
||||||
|
|
||||||
|
|
||||||
class EC2Inst(object):
|
class EC2Inst:
|
||||||
# TODO: this is leftover from when we could only support switch slots.
|
# TODO: this is leftover from when we could only support switch slots.
|
||||||
# This can be removed once self.switch_slots is dynamically allocated.
|
# This can be removed once self.switch_slots is dynamically allocated.
|
||||||
# Just make it arbitrarily large for now.
|
# Just make it arbitrarily large for now.
|
||||||
SWITCH_SLOTS = 100000
|
SWITCH_SLOTS: int = 100000
|
||||||
|
boto3_instance_object: Optional[Union[EC2InstanceResource, MockBoto3Instance]]
|
||||||
|
switch_slots: List[FireSimSwitchNode]
|
||||||
|
instance_deploy_manager: InstanceDeployManager
|
||||||
|
_next_port: int
|
||||||
|
nbd_tracker: NBDTracker
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.boto3_instance_object = None
|
self.boto3_instance_object = None
|
||||||
self.switch_slots = [None for x in range(self.SWITCH_SLOTS)]
|
self.switch_slots = []
|
||||||
self.switch_slots_consumed = 0
|
|
||||||
self.instance_deploy_manager = InstanceDeployManager(self)
|
self.instance_deploy_manager = InstanceDeployManager(self)
|
||||||
self._next_port = 10000 # track ports to allocate for server switch model ports
|
self._next_port = 10000 # track ports to allocate for server switch model ports
|
||||||
self.nbd_tracker = NBDTracker()
|
self.nbd_tracker = NBDTracker()
|
||||||
|
|
||||||
def assign_boto3_instance_object(self, boto3obj):
|
def assign_boto3_instance_object(self, boto3obj: Union[EC2InstanceResource, MockBoto3Instance]) -> None:
|
||||||
self.boto3_instance_object = boto3obj
|
self.boto3_instance_object = boto3obj
|
||||||
|
|
||||||
def is_bound_to_real_instance(self):
|
def is_bound_to_real_instance(self) -> bool:
|
||||||
return self.boto3_instance_object is not None
|
return self.boto3_instance_object is not None
|
||||||
|
|
||||||
def get_private_ip(self):
|
def get_private_ip(self) -> str:
|
||||||
|
assert self.boto3_instance_object is not None
|
||||||
return self.boto3_instance_object.private_ip_address
|
return self.boto3_instance_object.private_ip_address
|
||||||
|
|
||||||
def add_switch(self, firesimswitchnode):
|
def add_switch(self, firesimswitchnode: FireSimSwitchNode) -> None:
|
||||||
""" Add a switch to the next available switch slot. """
|
""" Add a switch to the next available switch slot. """
|
||||||
assert self.switch_slots_consumed < self.SWITCH_SLOTS
|
assert len(self.switch_slots) < self.SWITCH_SLOTS
|
||||||
self.switch_slots[self.switch_slots_consumed] = firesimswitchnode
|
self.switch_slots.append(firesimswitchnode)
|
||||||
firesimswitchnode.assign_host_instance(self)
|
firesimswitchnode.assign_host_instance(self)
|
||||||
self.switch_slots_consumed += 1
|
|
||||||
|
|
||||||
def get_num_switch_slots_consumed(self):
|
def allocate_host_port(self) -> int:
|
||||||
return self.switch_slots_consumed
|
|
||||||
|
|
||||||
def allocate_host_port(self):
|
|
||||||
""" Allocate a port to use for something on the host. Successive calls
|
""" Allocate a port to use for something on the host. Successive calls
|
||||||
will return a new port. """
|
will return a new port. """
|
||||||
retport = self._next_port
|
retport = self._next_port
|
||||||
|
@ -102,63 +115,55 @@ class EC2Inst(object):
|
||||||
return retport
|
return retport
|
||||||
|
|
||||||
class F1_Instance(EC2Inst):
|
class F1_Instance(EC2Inst):
|
||||||
FPGA_SLOTS = 0
|
FPGA_SLOTS: int = 0
|
||||||
|
fpga_slots: List[FireSimServerNode]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
self.fpga_slots = []
|
self.fpga_slots = []
|
||||||
self.fpga_slots_consumed = 0
|
|
||||||
super(F1_Instance, self).__init__()
|
|
||||||
|
|
||||||
def get_num_fpga_slots_max(self):
|
def get_num_fpga_slots_max(self) -> int:
|
||||||
""" Get the number of fpga slots. """
|
""" Get the number of fpga slots. """
|
||||||
return self.FPGA_SLOTS
|
return self.FPGA_SLOTS
|
||||||
|
|
||||||
def get_num_fpga_slots_consumed(self):
|
def add_simulation(self, firesimservernode: FireSimServerNode) -> None:
|
||||||
""" Get the number of fpga slots. """
|
|
||||||
return self.fpga_slots_consumed
|
|
||||||
|
|
||||||
def add_simulation(self, firesimservernode):
|
|
||||||
""" Add a simulation to the next available slot. """
|
""" Add a simulation to the next available slot. """
|
||||||
assert self.fpga_slots_consumed < self.FPGA_SLOTS
|
assert len(self.fpga_slots) < self.FPGA_SLOTS
|
||||||
self.fpga_slots[self.fpga_slots_consumed] = firesimservernode
|
self.fpga_slots.append(firesimservernode)
|
||||||
firesimservernode.assign_host_instance(self)
|
firesimservernode.assign_host_instance(self)
|
||||||
self.fpga_slots_consumed += 1
|
|
||||||
|
|
||||||
class F1_16(F1_Instance):
|
class F1_16(F1_Instance):
|
||||||
instance_counter = 0
|
instance_counter: int = 0
|
||||||
FPGA_SLOTS = 8
|
FPGA_SLOTS: int = 8
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
super(F1_16, self).__init__()
|
super().__init__()
|
||||||
self.fpga_slots = [None for x in range(self.FPGA_SLOTS)]
|
|
||||||
self.instance_id = F1_16.instance_counter
|
self.instance_id = F1_16.instance_counter
|
||||||
F1_16.instance_counter += 1
|
F1_16.instance_counter += 1
|
||||||
|
|
||||||
class F1_4(F1_Instance):
|
class F1_4(F1_Instance):
|
||||||
instance_counter = 0
|
instance_counter: int = 0
|
||||||
FPGA_SLOTS = 2
|
FPGA_SLOTS: int = 2
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
super(F1_4, self).__init__()
|
super().__init__()
|
||||||
self.fpga_slots = [None for x in range(self.FPGA_SLOTS)]
|
|
||||||
self.instance_id = F1_4.instance_counter
|
self.instance_id = F1_4.instance_counter
|
||||||
F1_4.instance_counter += 1
|
F1_4.instance_counter += 1
|
||||||
|
|
||||||
class F1_2(F1_Instance):
|
class F1_2(F1_Instance):
|
||||||
instance_counter = 0
|
instance_counter: int = 0
|
||||||
FPGA_SLOTS = 1
|
FPGA_SLOTS: int = 1
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
super(F1_2, self).__init__()
|
super().__init__()
|
||||||
self.fpga_slots = [None for x in range(self.FPGA_SLOTS)]
|
|
||||||
self.instance_id = F1_2.instance_counter
|
self.instance_id = F1_2.instance_counter
|
||||||
F1_2.instance_counter += 1
|
F1_2.instance_counter += 1
|
||||||
|
|
||||||
class M4_16(EC2Inst):
|
class M4_16(EC2Inst):
|
||||||
instance_counter = 0
|
instance_counter: int = 0
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
super(M4_16, self).__init__()
|
super().__init__()
|
||||||
self.instance_id = M4_16.instance_counter
|
self.instance_id = M4_16.instance_counter
|
||||||
M4_16.instance_counter += 1
|
M4_16.instance_counter += 1
|
||||||
|
|
||||||
|
@ -168,10 +173,20 @@ class RunFarm:
|
||||||
|
|
||||||
This way, you can assign "instances" to simulations first, and then assign
|
This way, you can assign "instances" to simulations first, and then assign
|
||||||
the real instance ids to the instance objects managed here."""
|
the real instance ids to the instance objects managed here."""
|
||||||
|
f1_16s: List[F1_16]
|
||||||
|
f1_4s: List[F1_4]
|
||||||
|
f1_2s: List[F1_2]
|
||||||
|
m4_16s: List[M4_16]
|
||||||
|
runfarmtag: str
|
||||||
|
run_instance_market: str
|
||||||
|
spot_interruption_behavior: str
|
||||||
|
spot_max_price: str
|
||||||
|
launch_timeout: timedelta
|
||||||
|
always_expand: bool
|
||||||
|
|
||||||
def __init__(self, num_f1_16, num_f1_4, num_f1_2, num_m4_16, runfarmtag,
|
def __init__(self, num_f1_16: int, num_f1_4: int, num_f1_2: int, num_m4_16: int, runfarmtag: str,
|
||||||
run_instance_market, spot_interruption_behavior,
|
run_instance_market: str, spot_interruption_behavior: str,
|
||||||
spot_max_price, launch_timeout, always_expand):
|
spot_max_price: str, launch_timeout: timedelta, always_expand: bool):
|
||||||
self.f1_16s = [F1_16() for x in range(num_f1_16)]
|
self.f1_16s = [F1_16() for x in range(num_f1_16)]
|
||||||
self.f1_4s = [F1_4() for x in range(num_f1_4)]
|
self.f1_4s = [F1_4() for x in range(num_f1_4)]
|
||||||
self.f1_2s = [F1_2() for x in range(num_f1_2)]
|
self.f1_2s = [F1_2() for x in range(num_f1_2)]
|
||||||
|
@ -185,7 +200,7 @@ class RunFarm:
|
||||||
self.launch_timeout = launch_timeout
|
self.launch_timeout = launch_timeout
|
||||||
self.always_expand = always_expand
|
self.always_expand = always_expand
|
||||||
|
|
||||||
def bind_mock_instances_to_objects(self):
|
def bind_mock_instances_to_objects(self) -> None:
|
||||||
""" Only used for testing. Bind mock Boto3 instances to objects. """
|
""" Only used for testing. Bind mock Boto3 instances to objects. """
|
||||||
for index in range(len(self.f1_16s)):
|
for index in range(len(self.f1_16s)):
|
||||||
self.f1_16s[index].assign_boto3_instance_object(MockBoto3Instance())
|
self.f1_16s[index].assign_boto3_instance_object(MockBoto3Instance())
|
||||||
|
@ -199,7 +214,7 @@ class RunFarm:
|
||||||
for index in range(len(self.m4_16s)):
|
for index in range(len(self.m4_16s)):
|
||||||
self.m4_16s[index].assign_boto3_instance_object(MockBoto3Instance())
|
self.m4_16s[index].assign_boto3_instance_object(MockBoto3Instance())
|
||||||
|
|
||||||
def bind_real_instances_to_objects(self):
|
def bind_real_instances_to_objects(self) -> None:
|
||||||
""" Attach running instances to the Run Farm. """
|
""" Attach running instances to the Run Farm. """
|
||||||
# fetch instances based on tag,
|
# fetch instances based on tag,
|
||||||
# populate IP addr list for use in the rest of our tasks.
|
# populate IP addr list for use in the rest of our tasks.
|
||||||
|
@ -244,7 +259,7 @@ class RunFarm:
|
||||||
self.f1_2s[index].assign_boto3_instance_object(instance)
|
self.f1_2s[index].assign_boto3_instance_object(instance)
|
||||||
|
|
||||||
|
|
||||||
def launch_run_farm(self):
|
def launch_run_farm(self) -> None:
|
||||||
""" Launch the run farm. """
|
""" Launch the run farm. """
|
||||||
runfarmtag = self.runfarmtag
|
runfarmtag = self.runfarmtag
|
||||||
runinstancemarket = self.run_instance_market
|
runinstancemarket = self.run_instance_market
|
||||||
|
@ -281,8 +296,8 @@ class RunFarm:
|
||||||
wait_on_instance_launches(f1_2s, 'f1.2xlarges')
|
wait_on_instance_launches(f1_2s, 'f1.2xlarges')
|
||||||
|
|
||||||
|
|
||||||
def terminate_run_farm(self, terminatesomef1_16, terminatesomef1_4, terminatesomef1_2,
|
def terminate_run_farm(self, terminatesomef1_16: int, terminatesomef1_4: int, terminatesomef1_2: int,
|
||||||
terminatesomem4_16, forceterminate):
|
terminatesomem4_16: int, forceterminate: bool):
|
||||||
runfarmtag = self.runfarmtag
|
runfarmtag = self.runfarmtag
|
||||||
|
|
||||||
# get instances that belong to the run farm. sort them in case we're only
|
# get instances that belong to the run farm. sort them in case we're only
|
||||||
|
@ -361,19 +376,18 @@ class RunFarm:
|
||||||
else:
|
else:
|
||||||
rootLogger.critical("Termination cancelled.")
|
rootLogger.critical("Termination cancelled.")
|
||||||
|
|
||||||
def get_all_host_nodes(self):
|
def get_all_host_nodes(self) -> List[EC2Inst]:
|
||||||
""" Get objects for all host nodes in the run farm that are bound to
|
""" Get objects for all host nodes in the run farm that are bound to
|
||||||
a real instance. """
|
a real instance. """
|
||||||
allinsts = self.f1_16s + self.f1_2s + self.f1_4s + self.m4_16s
|
allinsts: List[EC2Inst] = [*self.f1_16s, *self.f1_2s, *self.f1_4s, *self.m4_16s]
|
||||||
return [inst for inst in allinsts if inst.boto3_instance_object is not None]
|
return [inst for inst in allinsts if inst.is_bound_to_real_instance()]
|
||||||
|
|
||||||
def lookup_by_ip_addr(self, ipaddr):
|
def lookup_by_ip_addr(self, ipaddr: str) -> EC2Inst:
|
||||||
""" Get an instance object from its IP address. """
|
""" Get an instance object from its IP address. """
|
||||||
for host_node in self.get_all_host_nodes():
|
for host_node in self.get_all_host_nodes():
|
||||||
if host_node.get_private_ip() == ipaddr:
|
if host_node.get_private_ip() == ipaddr:
|
||||||
return host_node
|
return host_node
|
||||||
return None
|
assert False, f"Unable to find host node by {ipaddr} host name"
|
||||||
|
|
||||||
|
|
||||||
class InstanceDeployManager:
|
class InstanceDeployManager:
|
||||||
""" This class manages actually deploying/running stuff based on the
|
""" This class manages actually deploying/running stuff based on the
|
||||||
|
@ -381,14 +395,15 @@ class InstanceDeployManager:
|
||||||
|
|
||||||
This is in charge of managing the locations of stuff on remote nodes.
|
This is in charge of managing the locations of stuff on remote nodes.
|
||||||
"""
|
"""
|
||||||
|
parentnode: EC2Inst
|
||||||
|
|
||||||
def __init__(self, parentnode):
|
def __init__(self, parentnode: EC2Inst) -> None:
|
||||||
self.parentnode = parentnode
|
self.parentnode = parentnode
|
||||||
|
|
||||||
def instance_logger(self, logstr):
|
def instance_logger(self, logstr: str) -> None:
|
||||||
rootLogger.info("""[{}] """.format(env.host_string) + logstr)
|
rootLogger.info("""[{}] """.format(env.host_string) + logstr)
|
||||||
|
|
||||||
def get_and_install_aws_fpga_sdk(self):
|
def get_and_install_aws_fpga_sdk(self) -> None:
|
||||||
""" Installs the aws-sdk. This gets us access to tools to flash the fpga. """
|
""" Installs the aws-sdk. This gets us access to tools to flash the fpga. """
|
||||||
|
|
||||||
with prefix('cd ../'), \
|
with prefix('cd ../'), \
|
||||||
|
@ -405,7 +420,7 @@ class InstanceDeployManager:
|
||||||
with cd('/home/centos/aws-fpga'), StreamLogger('stdout'), StreamLogger('stderr'):
|
with cd('/home/centos/aws-fpga'), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run('source sdk_setup.sh')
|
run('source sdk_setup.sh')
|
||||||
|
|
||||||
def fpga_node_xdma(self):
|
def fpga_node_xdma(self) -> None:
|
||||||
""" Copy XDMA infra to remote node. This assumes that the driver was
|
""" Copy XDMA infra to remote node. This assumes that the driver was
|
||||||
already built and that a binary exists in the directory on this machine
|
already built and that a binary exists in the directory on this machine
|
||||||
"""
|
"""
|
||||||
|
@ -421,7 +436,7 @@ class InstanceDeployManager:
|
||||||
run('make clean')
|
run('make clean')
|
||||||
run('make')
|
run('make')
|
||||||
|
|
||||||
def fpga_node_qcow(self):
|
def fpga_node_qcow(self) -> None:
|
||||||
""" Install qemu-img management tools and copy NBD infra to remote
|
""" Install qemu-img management tools and copy NBD infra to remote
|
||||||
node. This assumes that the kernel module was already built and exists
|
node. This assumes that the kernel module was already built and exists
|
||||||
in the directory on this machine.
|
in the directory on this machine.
|
||||||
|
@ -434,7 +449,7 @@ class InstanceDeployManager:
|
||||||
# copy over kernel module
|
# copy over kernel module
|
||||||
put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
|
put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
|
||||||
|
|
||||||
def load_nbd_module(self):
|
def load_nbd_module(self) -> None:
|
||||||
""" load the nbd module. always unload the module first to ensure it
|
""" load the nbd module. always unload the module first to ensure it
|
||||||
is in a clean state. """
|
is in a clean state. """
|
||||||
self.unload_nbd_module()
|
self.unload_nbd_module()
|
||||||
|
@ -443,7 +458,7 @@ class InstanceDeployManager:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.parentnode.nbd_tracker.NBDS_MAX))
|
run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.parentnode.nbd_tracker.NBDS_MAX))
|
||||||
|
|
||||||
def unload_nbd_module(self):
|
def unload_nbd_module(self) -> None:
|
||||||
""" unload the nbd module. """
|
""" unload the nbd module. """
|
||||||
self.instance_logger("Unloading NBD Kernel Module.")
|
self.instance_logger("Unloading NBD Kernel Module.")
|
||||||
|
|
||||||
|
@ -452,7 +467,7 @@ class InstanceDeployManager:
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run('sudo rmmod nbd')
|
run('sudo rmmod nbd')
|
||||||
|
|
||||||
def disconnect_all_nbds_instance(self):
|
def disconnect_all_nbds_instance(self) -> None:
|
||||||
""" Disconnect all nbds on the instance. """
|
""" Disconnect all nbds on the instance. """
|
||||||
self.instance_logger("Disconnecting all NBDs.")
|
self.instance_logger("Disconnecting all NBDs.")
|
||||||
|
|
||||||
|
@ -465,7 +480,7 @@ class InstanceDeployManager:
|
||||||
|
|
||||||
run("; ".join(fullcmd))
|
run("; ".join(fullcmd))
|
||||||
|
|
||||||
def unload_xrt_and_xocl(self):
|
def unload_xrt_and_xocl(self) -> None:
|
||||||
self.instance_logger("Unloading XRT-related Kernel Modules.")
|
self.instance_logger("Unloading XRT-related Kernel Modules.")
|
||||||
|
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
|
@ -476,7 +491,7 @@ class InstanceDeployManager:
|
||||||
run('sudo yum remove -y xrt xrt-aws')
|
run('sudo yum remove -y xrt xrt-aws')
|
||||||
remote_kmsg("removing_xrt_end")
|
remote_kmsg("removing_xrt_end")
|
||||||
|
|
||||||
def unload_xdma(self):
|
def unload_xdma(self) -> None:
|
||||||
self.instance_logger("Unloading XDMA Driver Kernel Module.")
|
self.instance_logger("Unloading XDMA Driver Kernel Module.")
|
||||||
|
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
|
@ -489,8 +504,10 @@ class InstanceDeployManager:
|
||||||
#self.instance_logger("Waiting 10 seconds after removing kernel modules (esp. xocl).")
|
#self.instance_logger("Waiting 10 seconds after removing kernel modules (esp. xocl).")
|
||||||
#time.sleep(10)
|
#time.sleep(10)
|
||||||
|
|
||||||
def clear_fpgas(self):
|
def clear_fpgas(self) -> None:
|
||||||
# we always clear ALL fpga slots
|
# we always clear ALL fpga slots
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
|
|
||||||
for slotno in range(self.parentnode.get_num_fpga_slots_max()):
|
for slotno in range(self.parentnode.get_num_fpga_slots_max()):
|
||||||
self.instance_logger("""Clearing FPGA Slot {}.""".format(slotno))
|
self.instance_logger("""Clearing FPGA Slot {}.""".format(slotno))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
|
@ -506,16 +523,18 @@ class InstanceDeployManager:
|
||||||
remote_kmsg("""done_checking_clear_fpga{}""".format(slotno))
|
remote_kmsg("""done_checking_clear_fpga{}""".format(slotno))
|
||||||
|
|
||||||
|
|
||||||
def flash_fpgas(self):
|
def flash_fpgas(self) -> None:
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
|
|
||||||
dummyagfi = None
|
dummyagfi = None
|
||||||
for firesimservernode, slotno in zip(self.parentnode.fpga_slots, range(self.parentnode.get_num_fpga_slots_consumed())):
|
|
||||||
if firesimservernode is not None:
|
for slotno, firesimservernode in enumerate(self.parentnode.fpga_slots):
|
||||||
agfi = firesimservernode.get_agfi()
|
agfi = firesimservernode.get_agfi()
|
||||||
dummyagfi = agfi
|
dummyagfi = agfi
|
||||||
self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
|
self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
|
run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
|
||||||
slotno, agfi))
|
slotno, agfi))
|
||||||
|
|
||||||
# We only do this because XDMA hangs if some of the FPGAs on the instance
|
# We only do this because XDMA hangs if some of the FPGAs on the instance
|
||||||
# are left in the cleared state. So, if you're only using some of the
|
# are left in the cleared state. So, if you're only using some of the
|
||||||
|
@ -523,25 +542,24 @@ class InstanceDeployManager:
|
||||||
# anyway. Since the only interaction we have with an FPGA right now
|
# anyway. Since the only interaction we have with an FPGA right now
|
||||||
# is over PCIe where the software component is mastering, this can't
|
# is over PCIe where the software component is mastering, this can't
|
||||||
# break anything.
|
# break anything.
|
||||||
for slotno in range(self.parentnode.get_num_fpga_slots_consumed(), self.parentnode.get_num_fpga_slots_max()):
|
for slotno in range(len(self.parentnode.fpga_slots), self.parentnode.get_num_fpga_slots_max()):
|
||||||
self.instance_logger("""Flashing FPGA Slot: {} with dummy agfi: {}.""".format(slotno, dummyagfi))
|
self.instance_logger("""Flashing FPGA Slot: {} with dummy agfi: {}.""".format(slotno, dummyagfi))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
|
run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
|
||||||
slotno, dummyagfi))
|
slotno, dummyagfi))
|
||||||
|
|
||||||
for firesimservernode, slotno in zip(self.parentnode.fpga_slots, range(self.parentnode.get_num_fpga_slots_consumed())):
|
for slotno, firesimservernode in enumerate(self.parentnode.fpga_slots):
|
||||||
if firesimservernode is not None:
|
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
|
||||||
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
|
||||||
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
|
|
||||||
|
|
||||||
for slotno in range(self.parentnode.get_num_fpga_slots_consumed(), self.parentnode.get_num_fpga_slots_max()):
|
for slotno in range(len(self.parentnode.fpga_slots), self.parentnode.get_num_fpga_slots_max()):
|
||||||
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, dummyagfi))
|
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, dummyagfi))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
|
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
|
||||||
|
|
||||||
|
|
||||||
def load_xdma(self):
|
def load_xdma(self) -> None:
|
||||||
""" load the xdma kernel module. """
|
""" load the xdma kernel module. """
|
||||||
# fpga mgmt tools seem to force load xocl after a flash now...
|
# fpga mgmt tools seem to force load xocl after a flash now...
|
||||||
# xocl conflicts with the xdma driver, which we actually want to use
|
# xocl conflicts with the xdma driver, which we actually want to use
|
||||||
|
@ -553,7 +571,7 @@ class InstanceDeployManager:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
|
run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
|
||||||
|
|
||||||
def start_ila_server(self):
|
def start_ila_server(self) -> None:
|
||||||
""" start the vivado hw_server and virtual jtag on simulation instance.) """
|
""" start the vivado hw_server and virtual jtag on simulation instance.) """
|
||||||
self.instance_logger("Starting Vivado hw_server.")
|
self.instance_logger("Starting Vivado hw_server.")
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
|
@ -562,19 +580,19 @@ class InstanceDeployManager:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("""screen -S virtual_jtag -d -m bash -c "script -f -c 'sudo fpga-start-virtual-jtag -P 10201 -S 0'"; sleep 1""")
|
run("""screen -S virtual_jtag -d -m bash -c "script -f -c 'sudo fpga-start-virtual-jtag -P 10201 -S 0'"; sleep 1""")
|
||||||
|
|
||||||
def kill_ila_server(self):
|
def kill_ila_server(self) -> None:
|
||||||
""" Kill the vivado hw_server and virtual jtag """
|
""" Kill the vivado hw_server and virtual jtag """
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("sudo pkill -SIGKILL hw_server")
|
run("sudo pkill -SIGKILL hw_server")
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("sudo pkill -SIGKILL fpga-local-cmd")
|
run("sudo pkill -SIGKILL fpga-local-cmd")
|
||||||
|
|
||||||
def copy_sim_slot_infrastructure(self, slotno):
|
def copy_sim_slot_infrastructure(self, slotno: int) -> None:
|
||||||
""" copy all the simulation infrastructure to the remote node. """
|
""" copy all the simulation infrastructure to the remote node. """
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
|
assert slotno < len(self.parentnode.fpga_slots)
|
||||||
|
|
||||||
serv = self.parentnode.fpga_slots[slotno]
|
serv = self.parentnode.fpga_slots[slotno]
|
||||||
if serv is None:
|
|
||||||
# slot unassigned
|
|
||||||
return
|
|
||||||
|
|
||||||
self.instance_logger("""Copying FPGA simulation infrastructure for slot: {}.""".format(slotno))
|
self.instance_logger("""Copying FPGA simulation infrastructure for slot: {}.""".format(slotno))
|
||||||
|
|
||||||
|
@ -588,7 +606,7 @@ class InstanceDeployManager:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
# -z --inplace
|
# -z --inplace
|
||||||
rsync_cap = rsync_project(local_dir=local_path, remote_dir=pjoin(remote_sim_rsync_dir, remote_path),
|
rsync_cap = rsync_project(local_dir=local_path, remote_dir=pjoin(remote_sim_rsync_dir, remote_path),
|
||||||
ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-L", capture=True)
|
ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-L", capture=True)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
|
|
||||||
|
@ -596,7 +614,10 @@ class InstanceDeployManager:
|
||||||
run("""cp -r {}/* {}/""".format(remote_sim_rsync_dir, remote_sim_dir), shell=True)
|
run("""cp -r {}/* {}/""".format(remote_sim_rsync_dir, remote_sim_dir), shell=True)
|
||||||
|
|
||||||
|
|
||||||
def copy_switch_slot_infrastructure(self, switchslot):
|
def copy_switch_slot_infrastructure(self, switchslot: int) -> None:
|
||||||
|
assert isinstance(self.parentnode, M4_16)
|
||||||
|
assert switchslot < len(self.parentnode.switch_slots)
|
||||||
|
|
||||||
self.instance_logger("""Copying switch simulation infrastructure for switch slot: {}.""".format(switchslot))
|
self.instance_logger("""Copying switch simulation infrastructure for switch slot: {}.""".format(switchslot))
|
||||||
|
|
||||||
remote_switch_dir = """/home/centos/switch_slot_{}/""".format(switchslot)
|
remote_switch_dir = """/home/centos/switch_slot_{}/""".format(switchslot)
|
||||||
|
@ -609,54 +630,67 @@ class InstanceDeployManager:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
put(local_path, pjoin(remote_switch_dir, remote_path), mirror_local_mode=True)
|
put(local_path, pjoin(remote_switch_dir, remote_path), mirror_local_mode=True)
|
||||||
|
|
||||||
def start_switch_slot(self, switchslot):
|
def start_switch_slot(self, switchslot: int) -> None:
|
||||||
|
assert isinstance(self.parentnode, M4_16)
|
||||||
|
assert switchslot < len(self.parentnode.switch_slots)
|
||||||
|
|
||||||
self.instance_logger("""Starting switch simulation for switch slot: {}.""".format(switchslot))
|
self.instance_logger("""Starting switch simulation for switch slot: {}.""".format(switchslot))
|
||||||
remote_switch_dir = """/home/centos/switch_slot_{}/""".format(switchslot)
|
remote_switch_dir = """/home/centos/switch_slot_{}/""".format(switchslot)
|
||||||
switch = self.parentnode.switch_slots[switchslot]
|
switch = self.parentnode.switch_slots[switchslot]
|
||||||
with cd(remote_switch_dir), StreamLogger('stdout'), StreamLogger('stderr'):
|
with cd(remote_switch_dir), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run(switch.get_switch_start_command())
|
run(switch.get_switch_start_command())
|
||||||
|
|
||||||
def start_sim_slot(self, slotno):
|
def start_sim_slot(self, slotno: int) -> None:
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
|
assert slotno < len(self.parentnode.fpga_slots)
|
||||||
|
|
||||||
self.instance_logger("""Starting FPGA simulation for slot: {}.""".format(slotno))
|
self.instance_logger("""Starting FPGA simulation for slot: {}.""".format(slotno))
|
||||||
remote_sim_dir = """/home/centos/sim_slot_{}/""".format(slotno)
|
remote_sim_dir = """/home/centos/sim_slot_{}/""".format(slotno)
|
||||||
server = self.parentnode.fpga_slots[slotno]
|
server = self.parentnode.fpga_slots[slotno]
|
||||||
with cd(remote_sim_dir), StreamLogger('stdout'), StreamLogger('stderr'):
|
with cd(remote_sim_dir), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
server.run_sim_start_command(slotno)
|
server.run_sim_start_command(slotno)
|
||||||
|
|
||||||
def kill_switch_slot(self, switchslot):
|
def kill_switch_slot(self, switchslot: int) -> None:
|
||||||
""" kill the switch in slot switchslot. """
|
""" kill the switch in slot switchslot. """
|
||||||
|
assert isinstance(self.parentnode, M4_16)
|
||||||
|
assert switchslot < len(self.parentnode.switch_slots)
|
||||||
|
|
||||||
self.instance_logger("""Killing switch simulation for switchslot: {}.""".format(switchslot))
|
self.instance_logger("""Killing switch simulation for switchslot: {}.""".format(switchslot))
|
||||||
switch = self.parentnode.switch_slots[switchslot]
|
switch = self.parentnode.switch_slots[switchslot]
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run(switch.get_switch_kill_command())
|
run(switch.get_switch_kill_command())
|
||||||
|
|
||||||
def kill_sim_slot(self, slotno):
|
def kill_sim_slot(self, slotno: int) -> None:
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
|
assert slotno < len(self.parentnode.fpga_slots)
|
||||||
|
|
||||||
self.instance_logger("""Killing FPGA simulation for slot: {}.""".format(slotno))
|
self.instance_logger("""Killing FPGA simulation for slot: {}.""".format(slotno))
|
||||||
server = self.parentnode.fpga_slots[slotno]
|
server = self.parentnode.fpga_slots[slotno]
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run(server.get_sim_kill_command(slotno))
|
run(server.get_sim_kill_command(slotno))
|
||||||
|
|
||||||
def instance_assigned_simulations(self):
|
def instance_assigned_simulations(self) -> bool:
|
||||||
""" return true if this instance has any assigned fpga simulations. """
|
""" return true if this instance has any assigned fpga simulations. """
|
||||||
if not isinstance(self.parentnode, M4_16):
|
if isinstance(self.parentnode, F1_Instance):
|
||||||
if any(self.parentnode.fpga_slots):
|
if len(self.parentnode.fpga_slots) > 0:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def instance_assigned_switches(self):
|
def instance_assigned_switches(self) -> bool:
|
||||||
""" return true if this instance has any assigned switch simulations. """
|
""" return true if this instance has any assigned switch simulations. """
|
||||||
if any(self.parentnode.switch_slots):
|
if len(self.parentnode.switch_slots) > 0:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def infrasetup_instance(self):
|
def infrasetup_instance(self) -> None:
|
||||||
""" Handle infrastructure setup for this instance. """
|
""" Handle infrastructure setup for this instance. """
|
||||||
# check if fpga node
|
# check if fpga node
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
# This is an FPGA-host node.
|
# This is an FPGA-host node.
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
|
|
||||||
# copy fpga sim infrastructure
|
# copy fpga sim infrastructure
|
||||||
for slotno in range(self.parentnode.get_num_fpga_slots_consumed()):
|
for slotno in range(len(self.parentnode.fpga_slots)):
|
||||||
self.copy_sim_slot_infrastructure(slotno)
|
self.copy_sim_slot_infrastructure(slotno)
|
||||||
|
|
||||||
self.get_and_install_aws_fpga_sdk()
|
self.get_and_install_aws_fpga_sdk()
|
||||||
|
@ -685,46 +719,49 @@ class InstanceDeployManager:
|
||||||
|
|
||||||
if self.instance_assigned_switches():
|
if self.instance_assigned_switches():
|
||||||
# all nodes could have a switch
|
# all nodes could have a switch
|
||||||
for slotno in range(self.parentnode.get_num_switch_slots_consumed()):
|
for slotno in range(len(self.parentnode.switch_slots)):
|
||||||
self.copy_switch_slot_infrastructure(slotno)
|
self.copy_switch_slot_infrastructure(slotno)
|
||||||
|
|
||||||
|
|
||||||
def start_switches_instance(self):
|
def start_switches_instance(self) -> None:
|
||||||
""" Boot up all the switches in a screen. """
|
""" Boot up all the switches in a screen. """
|
||||||
# remove shared mem pages used by switches
|
# remove shared mem pages used by switches
|
||||||
if self.instance_assigned_switches():
|
if self.instance_assigned_switches():
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("sudo rm -rf /dev/shm/*")
|
run("sudo rm -rf /dev/shm/*")
|
||||||
|
|
||||||
for slotno in range(self.parentnode.get_num_switch_slots_consumed()):
|
for slotno in range(len(self.parentnode.switch_slots)):
|
||||||
self.start_switch_slot(slotno)
|
self.start_switch_slot(slotno)
|
||||||
|
|
||||||
def start_simulations_instance(self):
|
def start_simulations_instance(self) -> None:
|
||||||
""" Boot up all the sims in a screen. """
|
""" Boot up all the sims in a screen. """
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
|
|
||||||
# only on sim nodes
|
# only on sim nodes
|
||||||
for slotno in range(self.parentnode.get_num_fpga_slots_consumed()):
|
for slotno in range(len(self.parentnode.fpga_slots)):
|
||||||
self.start_sim_slot(slotno)
|
self.start_sim_slot(slotno)
|
||||||
|
|
||||||
def kill_switches_instance(self):
|
def kill_switches_instance(self) -> None:
|
||||||
""" Kill all the switches on this instance. """
|
""" Kill all the switches on this instance. """
|
||||||
if self.instance_assigned_switches():
|
if self.instance_assigned_switches():
|
||||||
for slotno in range(self.parentnode.get_num_switch_slots_consumed()):
|
for slotno in range(len(self.parentnode.switch_slots)):
|
||||||
self.kill_switch_slot(slotno)
|
self.kill_switch_slot(slotno)
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
run("sudo rm -rf /dev/shm/*")
|
run("sudo rm -rf /dev/shm/*")
|
||||||
|
|
||||||
def kill_simulations_instance(self, disconnect_all_nbds=True):
|
def kill_simulations_instance(self, disconnect_all_nbds: bool = True) -> None:
|
||||||
""" Kill all simulations on this instance. """
|
""" Kill all simulations on this instance. """
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
# only on sim nodes
|
# only on sim nodes
|
||||||
for slotno in range(self.parentnode.get_num_fpga_slots_consumed()):
|
for slotno in range(len(self.parentnode.fpga_slots)):
|
||||||
self.kill_sim_slot(slotno)
|
self.kill_sim_slot(slotno)
|
||||||
if disconnect_all_nbds:
|
if disconnect_all_nbds:
|
||||||
# disconnect all NBDs
|
# disconnect all NBDs
|
||||||
self.disconnect_all_nbds_instance()
|
self.disconnect_all_nbds_instance()
|
||||||
|
|
||||||
def running_simulations(self):
|
def running_simulations(self) -> Dict[str, List[str]]:
|
||||||
""" collect screen results from node to see what's running on it. """
|
""" collect screen results from node to see what's running on it. """
|
||||||
simdrivers = []
|
simdrivers = []
|
||||||
switches = []
|
switches = []
|
||||||
|
@ -734,16 +771,20 @@ class InstanceDeployManager:
|
||||||
if "(Detached)" in line or "(Attached)" in line:
|
if "(Detached)" in line or "(Attached)" in line:
|
||||||
line_stripped = line.strip()
|
line_stripped = line.strip()
|
||||||
if "fsim" in line:
|
if "fsim" in line:
|
||||||
line_stripped = re.search('fsim([0-9][0-9]*)', line_stripped).group(0)
|
search = re.search('fsim([0-9][0-9]*)', line_stripped)
|
||||||
|
assert search is not None
|
||||||
|
line_stripped = search.group(0)
|
||||||
line_stripped = line_stripped.replace('fsim', '')
|
line_stripped = line_stripped.replace('fsim', '')
|
||||||
simdrivers.append(line_stripped)
|
simdrivers.append(line_stripped)
|
||||||
elif "switch" in line:
|
elif "switch" in line:
|
||||||
line_stripped = re.search('switch([0-9][0-9]*)', line_stripped).group(0)
|
search = re.search('switch([0-9][0-9]*)', line_stripped)
|
||||||
|
assert search is not None
|
||||||
|
line_stripped = search.group(0)
|
||||||
switches.append(line_stripped)
|
switches.append(line_stripped)
|
||||||
return {'switches': switches, 'simdrivers': simdrivers}
|
return {'switches': switches, 'simdrivers': simdrivers}
|
||||||
|
|
||||||
def monitor_jobs_instance(self, completed_jobs, teardown, terminateoncompletion,
|
def monitor_jobs_instance(self, completed_jobs: List[str], teardown: bool, terminateoncompletion: bool,
|
||||||
job_results_dir):
|
job_results_dir: str) -> Dict[str, Dict[str, bool]]:
|
||||||
""" Job monitoring for this instance. """
|
""" Job monitoring for this instance. """
|
||||||
# make a local copy of completed_jobs, so that we can update it
|
# make a local copy of completed_jobs, so that we can update it
|
||||||
completed_jobs = list(completed_jobs)
|
completed_jobs = list(completed_jobs)
|
||||||
|
@ -759,14 +800,14 @@ class InstanceDeployManager:
|
||||||
if teardown:
|
if teardown:
|
||||||
# handle the case where we're just tearing down nodes that have
|
# handle the case where we're just tearing down nodes that have
|
||||||
# ONLY switches
|
# ONLY switches
|
||||||
numswitchesused = self.parentnode.get_num_switch_slots_consumed()
|
for counter in range(len(self.parentnode.switch_slots)):
|
||||||
for counter in range(numswitchesused):
|
|
||||||
switchsim = self.parentnode.switch_slots[counter]
|
switchsim = self.parentnode.switch_slots[counter]
|
||||||
switchsim.copy_back_switchlog_from_run(job_results_dir, counter)
|
switchsim.copy_back_switchlog_from_run(job_results_dir, counter)
|
||||||
|
|
||||||
if terminateoncompletion:
|
if terminateoncompletion:
|
||||||
# terminate the instance since teardown is called and instance
|
# terminate the instance since teardown is called and instance
|
||||||
# termination is enabled
|
# termination is enabled
|
||||||
|
assert isinstance(self.parentnode.boto3_instance_object, EC2InstanceResource)
|
||||||
instanceids = get_instance_ids_for_instances([self.parentnode.boto3_instance_object])
|
instanceids = get_instance_ids_for_instances([self.parentnode.boto3_instance_object])
|
||||||
terminate_instances(instanceids, dryrun=False)
|
terminate_instances(instanceids, dryrun=False)
|
||||||
|
|
||||||
|
@ -775,7 +816,7 @@ class InstanceDeployManager:
|
||||||
|
|
||||||
# not teardown - just get the status of the switch sims
|
# not teardown - just get the status of the switch sims
|
||||||
switchescompleteddict = {k: False for k in self.running_simulations()['switches']}
|
switchescompleteddict = {k: False for k in self.running_simulations()['switches']}
|
||||||
for switchsim in self.parentnode.switch_slots[:self.parentnode.get_num_switch_slots_consumed()]:
|
for switchsim in self.parentnode.switch_slots:
|
||||||
swname = switchsim.switch_builder.switch_binary_name()
|
swname = switchsim.switch_builder.switch_binary_name()
|
||||||
if swname not in switchescompleteddict.keys():
|
if swname not in switchescompleteddict.keys():
|
||||||
switchescompleteddict[swname] = True
|
switchescompleteddict[swname] = True
|
||||||
|
@ -783,14 +824,14 @@ class InstanceDeployManager:
|
||||||
|
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
# this node has fpga sims attached
|
# this node has fpga sims attached
|
||||||
|
assert isinstance(self.parentnode, F1_Instance)
|
||||||
|
|
||||||
# first, figure out which jobs belong to this instance.
|
# first, figure out which jobs belong to this instance.
|
||||||
# if they are all completed already. RETURN, DON'T TRY TO DO ANYTHING
|
# if they are all completed already. RETURN, DON'T TRY TO DO ANYTHING
|
||||||
# ON THE INSTNACE.
|
# ON THE INSTANCE.
|
||||||
parentslots = self.parentnode.fpga_slots
|
parentslots = self.parentnode.fpga_slots
|
||||||
rootLogger.debug("parentslots " + str(parentslots))
|
rootLogger.debug("parentslots " + str(parentslots))
|
||||||
num_parentslots_used = self.parentnode.fpga_slots_consumed
|
jobnames = [slot.get_job_name() for slot in parentslots]
|
||||||
jobnames = [slot.get_job_name() for slot in parentslots[0:num_parentslots_used]]
|
|
||||||
rootLogger.debug("jobnames " + str(jobnames))
|
rootLogger.debug("jobnames " + str(jobnames))
|
||||||
already_done = all([job in completed_jobs for job in jobnames])
|
already_done = all([job in completed_jobs for job in jobnames])
|
||||||
rootLogger.debug("already done? " + str(already_done))
|
rootLogger.debug("already done? " + str(already_done))
|
||||||
|
@ -807,7 +848,7 @@ class InstanceDeployManager:
|
||||||
|
|
||||||
if self.instance_assigned_switches():
|
if self.instance_assigned_switches():
|
||||||
# fill in whether switches have terminated for some reason
|
# fill in whether switches have terminated for some reason
|
||||||
for switchsim in self.parentnode.switch_slots[:self.parentnode.get_num_switch_slots_consumed()]:
|
for switchsim in self.parentnode.switch_slots:
|
||||||
swname = switchsim.switch_builder.switch_binary_name()
|
swname = switchsim.switch_builder.switch_binary_name()
|
||||||
if swname not in switchescompleteddict.keys():
|
if swname not in switchescompleteddict.keys():
|
||||||
switchescompleteddict[swname] = True
|
switchescompleteddict[swname] = True
|
||||||
|
@ -818,6 +859,7 @@ class InstanceDeployManager:
|
||||||
rootLogger.debug(slotsrunning)
|
rootLogger.debug(slotsrunning)
|
||||||
for slotno, jobname in enumerate(jobnames):
|
for slotno, jobname in enumerate(jobnames):
|
||||||
if str(slotno) not in slotsrunning and jobname not in completed_jobs:
|
if str(slotno) not in slotsrunning and jobname not in completed_jobs:
|
||||||
|
assert slotno < len(parentslots)
|
||||||
self.instance_logger("Slot " + str(slotno) + " completed! copying results.")
|
self.instance_logger("Slot " + str(slotno) + " completed! copying results.")
|
||||||
# NOW, we must copy off the results of this sim, since it just exited
|
# NOW, we must copy off the results of this sim, since it just exited
|
||||||
parentslots[slotno].copy_back_job_results_from_run(slotno)
|
parentslots[slotno].copy_back_job_results_from_run(slotno)
|
||||||
|
@ -846,15 +888,16 @@ class InstanceDeployManager:
|
||||||
|
|
||||||
self.kill_switches_instance()
|
self.kill_switches_instance()
|
||||||
|
|
||||||
for counter, switchsim in enumerate(self.parentnode.switch_slots[:self.parentnode.get_num_switch_slots_consumed()]):
|
for counter, switchsim in enumerate(self.parentnode.switch_slots):
|
||||||
switchsim.copy_back_switchlog_from_run(job_results_dir, counter)
|
switchsim.copy_back_switchlog_from_run(job_results_dir, counter)
|
||||||
|
|
||||||
if now_done and terminateoncompletion:
|
if now_done and terminateoncompletion:
|
||||||
# terminate the instance since everything is done and instance
|
# terminate the instance since everything is done and instance
|
||||||
# termination is enabled
|
# termination is enabled
|
||||||
|
assert isinstance(self.parentnode.boto3_instance_object, EC2InstanceResource)
|
||||||
instanceids = get_instance_ids_for_instances([self.parentnode.boto3_instance_object])
|
instanceids = get_instance_ids_for_instances([self.parentnode.boto3_instance_object])
|
||||||
terminate_instances(instanceids, dryrun=False)
|
terminate_instances(instanceids, dryrun=False)
|
||||||
|
|
||||||
return {'switches': switchescompleteddict, 'sims': jobs_done_q}
|
return {'switches': switchescompleteddict, 'sims': jobs_done_q}
|
||||||
|
|
||||||
|
assert False, "Instance must host switch slots and/or FPGA slots"
|
||||||
|
|
|
@ -1,46 +1,54 @@
|
||||||
""" This file manages the overall configuration of the system for running
|
""" This file manages the overall configuration of the system for running
|
||||||
simulation tasks. """
|
simulation tasks. """
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from time import strftime, gmtime
|
from time import strftime, gmtime
|
||||||
import pprint
|
import pprint
|
||||||
import logging
|
import logging
|
||||||
import yaml
|
import yaml
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from fabric.api import prefix, settings, local # type: ignore
|
||||||
|
|
||||||
from fabric.api import * # type: ignore
|
from awstools.awstools import aws_resource_names
|
||||||
from awstools.awstools import *
|
from awstools.afitools import get_firesim_tagval_for_agfi
|
||||||
from awstools.afitools import *
|
|
||||||
from runtools.firesim_topology_with_passes import FireSimTopologyWithPasses
|
from runtools.firesim_topology_with_passes import FireSimTopologyWithPasses
|
||||||
from runtools.workload import WorkloadConfig
|
from runtools.workload import WorkloadConfig
|
||||||
from runtools.run_farm import RunFarm
|
from runtools.run_farm import RunFarm
|
||||||
from util.streamlogger import StreamLogger
|
from util.streamlogger import StreamLogger
|
||||||
import os
|
|
||||||
import sys
|
from typing import Optional, Dict, Any, List, Sequence, TYPE_CHECKING
|
||||||
|
import argparse # this is not within a if TYPE_CHECKING: scope so the `register_task` in FireSim can evaluate it's annotation
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from runtools.utils import MacAddress
|
||||||
|
|
||||||
LOCAL_DRIVERS_BASE = "../sim/output/f1/"
|
LOCAL_DRIVERS_BASE = "../sim/output/f1/"
|
||||||
LOCAL_DRIVERS_GENERATED_SRC = "../sim/generated-src/f1/"
|
|
||||||
LOCAL_SYSROOT_LIB = "../sim/lib-install/lib/"
|
|
||||||
CUSTOM_RUNTIMECONFS_BASE = "../sim/custom-runtime-configs/"
|
CUSTOM_RUNTIMECONFS_BASE = "../sim/custom-runtime-configs/"
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
class RuntimeHWConfig:
|
class RuntimeHWConfig:
|
||||||
""" A pythonic version of the entires in config_hwdb.yaml """
|
""" A pythonic version of the entires in config_hwdb.yaml """
|
||||||
|
name: str
|
||||||
|
platform: str
|
||||||
|
agfi: str
|
||||||
|
deploytriplet: Optional[str]
|
||||||
|
customruntimeconfig: str
|
||||||
|
driver_built: bool
|
||||||
|
|
||||||
def __init__(self, name, hwconfig_dict):
|
def __init__(self, name: str, hwconfig_dict: Dict[str, Any]) -> None:
|
||||||
self.name = name
|
self.name = name
|
||||||
self.agfi = hwconfig_dict['agfi']
|
self.agfi = hwconfig_dict['agfi']
|
||||||
self.deploytriplet = hwconfig_dict['deploy_triplet_override']
|
self.deploytriplet = hwconfig_dict['deploy_triplet_override']
|
||||||
if self.deploytriplet is not None:
|
if self.deploytriplet is not None:
|
||||||
rootLogger.warning("{} is overriding a deploy triplet in your config_hwdb.yaml file. Make sure you understand why!".format(name))
|
rootLogger.warning("{} is overriding a deploy triplet in your config_hwdb.yaml file. Make sure you understand why!".format(name))
|
||||||
self.customruntimeconfig = hwconfig_dict['custom_runtime_config']
|
self.customruntimeconfig = hwconfig_dict['custom_runtime_config']
|
||||||
# note whether we've built a copy of the simulation driver for this hwconf
|
# note whether we've built a copy of the simulation driver for this hwconf
|
||||||
self.driver_built = False
|
self.driver_built = False
|
||||||
|
|
||||||
def get_deploytriplet_for_config(self):
|
def get_deploytriplet_for_config(self) -> str:
|
||||||
""" Get the deploytriplet for this configuration. This memoizes the request
|
""" Get the deploytriplet for this configuration. This memoizes the request
|
||||||
to the AWS AGFI API."""
|
to the AWS AGFI API."""
|
||||||
if self.deploytriplet is not None:
|
if self.deploytriplet is not None:
|
||||||
|
@ -48,28 +56,30 @@ class RuntimeHWConfig:
|
||||||
rootLogger.debug("Setting deploytriplet by querying the AGFI's description.")
|
rootLogger.debug("Setting deploytriplet by querying the AGFI's description.")
|
||||||
self.deploytriplet = get_firesim_tagval_for_agfi(self.agfi,
|
self.deploytriplet = get_firesim_tagval_for_agfi(self.agfi,
|
||||||
'firesim-deploytriplet')
|
'firesim-deploytriplet')
|
||||||
def get_design_name(self):
|
return self.deploytriplet
|
||||||
|
|
||||||
|
def get_design_name(self) -> str:
|
||||||
""" Returns the name used to prefix MIDAS-emitted files. (The DESIGN make var) """
|
""" Returns the name used to prefix MIDAS-emitted files. (The DESIGN make var) """
|
||||||
my_deploytriplet = self.get_deploytriplet_for_config()
|
my_deploytriplet = self.get_deploytriplet_for_config()
|
||||||
my_design = my_deploytriplet.split("-")[0]
|
my_design = my_deploytriplet.split("-")[0]
|
||||||
return my_design
|
return my_design
|
||||||
|
|
||||||
def get_local_driver_binaryname(self):
|
def get_local_driver_binaryname(self) -> str:
|
||||||
""" Get the name of the driver binary. """
|
""" Get the name of the driver binary. """
|
||||||
return self.get_design_name() + "-f1"
|
return self.get_design_name() + "-f1"
|
||||||
|
|
||||||
def get_local_driver_path(self):
|
def get_local_driver_path(self) -> str:
|
||||||
""" return relative local path of the driver used to run this sim. """
|
""" return relative local path of the driver used to run this sim. """
|
||||||
my_deploytriplet = self.get_deploytriplet_for_config()
|
my_deploytriplet = self.get_deploytriplet_for_config()
|
||||||
drivers_software_base = LOCAL_DRIVERS_BASE + "/" + my_deploytriplet + "/"
|
drivers_software_base = LOCAL_DRIVERS_BASE + "/" + my_deploytriplet + "/"
|
||||||
fpga_driver_local = drivers_software_base + self.get_local_driver_binaryname()
|
fpga_driver_local = drivers_software_base + self.get_local_driver_binaryname()
|
||||||
return fpga_driver_local
|
return fpga_driver_local
|
||||||
|
|
||||||
def get_local_runtimeconf_binaryname(self):
|
def get_local_runtimeconf_binaryname(self) -> str:
|
||||||
""" Get the name of the runtimeconf file. """
|
""" Get the name of the runtimeconf file. """
|
||||||
return "runtime.conf" if self.customruntimeconfig is None else os.path.basename(self.customruntimeconfig)
|
return "runtime.conf" if self.customruntimeconfig is None else os.path.basename(self.customruntimeconfig)
|
||||||
|
|
||||||
def get_local_runtime_conf_path(self):
|
def get_local_runtime_conf_path(self) -> str:
|
||||||
""" return relative local path of the runtime conf used to run this sim. """
|
""" return relative local path of the runtime conf used to run this sim. """
|
||||||
my_deploytriplet = self.get_deploytriplet_for_config()
|
my_deploytriplet = self.get_deploytriplet_for_config()
|
||||||
drivers_software_base = LOCAL_DRIVERS_BASE + "/" + my_deploytriplet + "/"
|
drivers_software_base = LOCAL_DRIVERS_BASE + "/" + my_deploytriplet + "/"
|
||||||
|
@ -80,16 +90,16 @@ class RuntimeHWConfig:
|
||||||
runtime_conf_local = CUSTOM_RUNTIMECONFS_BASE + my_runtimeconfig
|
runtime_conf_local = CUSTOM_RUNTIMECONFS_BASE + my_runtimeconfig
|
||||||
return runtime_conf_local
|
return runtime_conf_local
|
||||||
|
|
||||||
def get_boot_simulation_command(self, slotid, all_macs,
|
def get_boot_simulation_command(self, slotid: int, all_macs: Sequence[Optional[MacAddress]],
|
||||||
all_rootfses, all_linklatencies,
|
all_rootfses: Sequence[Optional[str]], all_linklatencies: Sequence[Optional[int]],
|
||||||
all_netbws, profile_interval,
|
all_netbws: Sequence[Optional[int]], profile_interval: int,
|
||||||
all_bootbinaries, trace_enable,
|
all_bootbinaries: List[str], trace_enable: bool,
|
||||||
trace_select, trace_start, trace_end,
|
trace_select: str, trace_start: str, trace_end: str,
|
||||||
trace_output_format,
|
trace_output_format: str,
|
||||||
autocounter_readrate, all_shmemportnames,
|
autocounter_readrate: int, all_shmemportnames: List[str],
|
||||||
enable_zerooutdram, disable_asserts,
|
enable_zerooutdram: bool, disable_asserts_arg: bool,
|
||||||
print_start, print_end,
|
print_start: str, print_end: str,
|
||||||
enable_print_cycle_prefix):
|
enable_print_cycle_prefix: bool) -> str:
|
||||||
""" return the command used to boot the simulation. this has to have
|
""" return the command used to boot the simulation. this has to have
|
||||||
some external params passed to it, because not everything is contained
|
some external params passed to it, because not everything is contained
|
||||||
in a runtimehwconfig. TODO: maybe runtimehwconfig should be renamed to
|
in a runtimehwconfig. TODO: maybe runtimehwconfig should be renamed to
|
||||||
|
@ -129,52 +139,26 @@ class RuntimeHWConfig:
|
||||||
|
|
||||||
command_bootbinaries = array_to_plusargs(all_bootbinaries, "+prog")
|
command_bootbinaries = array_to_plusargs(all_bootbinaries, "+prog")
|
||||||
zero_out_dram = "+zero-out-dram" if (enable_zerooutdram) else ""
|
zero_out_dram = "+zero-out-dram" if (enable_zerooutdram) else ""
|
||||||
disable_asserts_arg = "+disable-asserts" if (disable_asserts) else ""
|
disable_asserts = "+disable-asserts" if (disable_asserts_arg) else ""
|
||||||
print_cycle_prefix = "+print-no-cycle-prefix" if not enable_print_cycle_prefix else ""
|
print_cycle_prefix = "+print-no-cycle-prefix" if not enable_print_cycle_prefix else ""
|
||||||
|
|
||||||
# TODO supernode support
|
# TODO supernode support
|
||||||
dwarf_file_name = "+dwarf-file-name=" + all_bootbinaries[0] + "-dwarf"
|
dwarf_file_name = "+dwarf-file-name=" + all_bootbinaries[0] + "-dwarf"
|
||||||
|
|
||||||
# TODO: supernode support (tracefile, trace-select.. etc)
|
# TODO: supernode support (tracefile, trace-select.. etc)
|
||||||
basecommand = """screen -S fsim{slotid} -d -m bash -c "script -f -c 'stty intr ^] && sudo ./{driver} +permissive $(sed \':a;N;$!ba;s/\\n/ /g\' {runtimeconf}) +slotid={slotid} +profile-interval={profile_interval} {zero_out_dram} {disable_asserts} {command_macs} {command_rootfses} {command_niclogs} {command_blkdev_logs} {tracefile} +trace-select={trace_select} +trace-start={trace_start} +trace-end={trace_end} +trace-output-format={trace_output_format} {dwarf_file_name} +autocounter-readrate={autocounter_readrate} {autocounterfile} {command_dromajo} {print_cycle_prefix} +print-start={print_start} +print-end={print_end} {command_linklatencies} {command_netbws} {command_shmemportnames} +permissive-off {command_bootbinaries} && stty intr ^c' uartlog"; sleep 1""".format(
|
basecommand = f"""screen -S fsim{slotid} -d -m bash -c "script -f -c 'stty intr ^] && sudo ./{driver} +permissive $(sed \':a;N;$!ba;s/\\n/ /g\' {runtimeconf}) +slotid={slotid} +profile-interval={profile_interval} {zero_out_dram} {disable_asserts} {command_macs} {command_rootfses} {command_niclogs} {command_blkdev_logs} {tracefile} +trace-select={trace_select} +trace-start={trace_start} +trace-end={trace_end} +trace-output-format={trace_output_format} {dwarf_file_name} +autocounter-readrate={autocounter_readrate} {autocounterfile} {command_dromajo} {print_cycle_prefix} +print-start={print_start} +print-end={print_end} {command_linklatencies} {command_netbws} {command_shmemportnames} +permissive-off {command_bootbinaries} && stty intr ^c' uartlog"; sleep 1"""
|
||||||
slotid=slotid,
|
|
||||||
driver=driver,
|
|
||||||
runtimeconf=runtimeconf,
|
|
||||||
command_macs=command_macs,
|
|
||||||
command_rootfses=command_rootfses,
|
|
||||||
command_niclogs=command_niclogs,
|
|
||||||
command_blkdev_logs=command_blkdev_logs,
|
|
||||||
command_linklatencies=command_linklatencies,
|
|
||||||
command_netbws=command_netbws,
|
|
||||||
profile_interval=profile_interval,
|
|
||||||
zero_out_dram=zero_out_dram,
|
|
||||||
disable_asserts=disable_asserts_arg,
|
|
||||||
command_shmemportnames=command_shmemportnames,
|
|
||||||
command_bootbinaries=command_bootbinaries,
|
|
||||||
trace_select=trace_select,
|
|
||||||
trace_start=trace_start,
|
|
||||||
trace_end=trace_end,
|
|
||||||
tracefile=tracefile,
|
|
||||||
trace_output_format=trace_output_format,
|
|
||||||
dwarf_file_name=dwarf_file_name,
|
|
||||||
autocounterfile=autocounterfile,
|
|
||||||
autocounter_readrate=autocounter_readrate,
|
|
||||||
command_dromajo=command_dromajo,
|
|
||||||
print_cycle_prefix=print_cycle_prefix,
|
|
||||||
print_start=print_start,
|
|
||||||
print_end=print_end)
|
|
||||||
|
|
||||||
return basecommand
|
return basecommand
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_kill_simulation_command(self):
|
def get_kill_simulation_command(self) -> str:
|
||||||
driver = self.get_local_driver_binaryname()
|
driver = self.get_local_driver_binaryname()
|
||||||
# Note that pkill only works for names <=15 characters
|
# Note that pkill only works for names <=15 characters
|
||||||
return """sudo pkill -SIGKILL {driver}""".format(driver=driver[:15])
|
return """sudo pkill -SIGKILL {driver}""".format(driver=driver[:15])
|
||||||
|
|
||||||
|
|
||||||
def build_fpga_driver(self):
|
def build_fpga_driver(self) -> None:
|
||||||
""" Build FPGA driver for running simulation """
|
""" Build FPGA driver for running simulation """
|
||||||
if self.driver_built:
|
if self.driver_built:
|
||||||
# we already built the driver at some point
|
# we already built the driver at some point
|
||||||
|
@ -207,15 +191,16 @@ class RuntimeHWConfig:
|
||||||
self.driver_built = True
|
self.driver_built = True
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self) -> str:
|
||||||
return """RuntimeHWConfig: {}\nDeployTriplet: {}\nAGFI: {}\nCustomRuntimeConf: {}""".format(self.name, self.deploytriplet, self.agfi, str(self.customruntimeconfig))
|
return """RuntimeHWConfig: {}\nDeployTriplet: {}\nAGFI: {}\nCustomRuntimeConf: {}""".format(self.name, self.deploytriplet, self.agfi, str(self.customruntimeconfig))
|
||||||
|
|
||||||
|
|
||||||
class RuntimeHWDB:
|
class RuntimeHWDB:
|
||||||
""" This class manages the hardware configurations that are available
|
""" This class manages the hardware configurations that are available
|
||||||
as endpoints on the simulation. """
|
as endpoints on the simulation. """
|
||||||
|
hwconf_dict: Dict[str, RuntimeHWConfig]
|
||||||
|
|
||||||
def __init__(self, hardwaredbconfigfile):
|
def __init__(self, hardwaredbconfigfile: str) -> None:
|
||||||
|
|
||||||
agfidb_configfile = None
|
agfidb_configfile = None
|
||||||
with open(hardwaredbconfigfile, "r") as yaml_file:
|
with open(hardwaredbconfigfile, "r") as yaml_file:
|
||||||
|
@ -225,27 +210,56 @@ class RuntimeHWDB:
|
||||||
|
|
||||||
self.hwconf_dict = {s: RuntimeHWConfig(s, v) for s, v in agfidb_dict.items()}
|
self.hwconf_dict = {s: RuntimeHWConfig(s, v) for s, v in agfidb_dict.items()}
|
||||||
|
|
||||||
def get_runtimehwconfig_from_name(self, name):
|
def get_runtimehwconfig_from_name(self, name: str) -> RuntimeHWConfig:
|
||||||
return self.hwconf_dict[name]
|
return self.hwconf_dict[name]
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self) -> str:
|
||||||
return pprint.pformat(vars(self))
|
return pprint.pformat(vars(self))
|
||||||
|
|
||||||
|
|
||||||
class InnerRuntimeConfiguration:
|
class InnerRuntimeConfiguration:
|
||||||
""" Pythonic version of config_runtime.yaml """
|
""" Pythonic version of config_runtime.yaml """
|
||||||
|
runfarmtag: str
|
||||||
|
f1_16xlarges_requested: int
|
||||||
|
f1_4xlarges_requested: int
|
||||||
|
m4_16xlarges_requested: int
|
||||||
|
f1_2xlarges_requested: int
|
||||||
|
run_instance_market: str
|
||||||
|
spot_interruption_behavior: str
|
||||||
|
spot_max_price: str
|
||||||
|
topology: str
|
||||||
|
no_net_num_nodes: int
|
||||||
|
linklatency: int
|
||||||
|
switchinglatency: int
|
||||||
|
netbandwidth: int
|
||||||
|
profileinterval: int
|
||||||
|
launch_timeout: timedelta
|
||||||
|
always_expand: bool
|
||||||
|
trace_enable: bool
|
||||||
|
trace_select: str
|
||||||
|
trace_start: str
|
||||||
|
trace_end: str
|
||||||
|
trace_output_format: str
|
||||||
|
autocounter_readrate: int
|
||||||
|
zerooutdram: bool
|
||||||
|
disable_asserts: bool
|
||||||
|
print_start: str
|
||||||
|
print_end: str
|
||||||
|
print_cycle_prefix: bool
|
||||||
|
workload_name: str
|
||||||
|
suffixtag: str
|
||||||
|
terminateoncompletion: bool
|
||||||
|
|
||||||
def __init__(self, runtimeconfigfile, configoverridedata):
|
def __init__(self, runtimeconfigfile: str, configoverridedata: str) -> None:
|
||||||
|
|
||||||
runtime_dict = None
|
runtime_dict = None
|
||||||
with open(runtimeconfigfile, "r") as yaml_file:
|
with open(runtimeconfigfile, "r") as yaml_file:
|
||||||
runtime_dict = yaml.safe_load(yaml_file)
|
runtime_dict = yaml.safe_load(yaml_file)
|
||||||
|
|
||||||
# override parts of the runtime conf if specified
|
# override parts of the runtime conf if specified
|
||||||
configoverrideval = configoverridedata
|
if configoverridedata != "":
|
||||||
if configoverrideval != "":
|
|
||||||
## handle overriding part of the runtime conf
|
## handle overriding part of the runtime conf
|
||||||
configoverrideval = configoverrideval.split()
|
configoverrideval = configoverridedata.split()
|
||||||
overridesection = configoverrideval[0]
|
overridesection = configoverrideval[0]
|
||||||
overridefield = configoverrideval[1]
|
overridefield = configoverrideval[1]
|
||||||
overridevalue = configoverrideval[2]
|
overridevalue = configoverrideval[2]
|
||||||
|
@ -323,14 +337,14 @@ class InnerRuntimeConfiguration:
|
||||||
self.suffixtag = runtime_dict['workload']['suffix_tag'] if 'suffix_tag' in runtime_dict['workload'] else None
|
self.suffixtag = runtime_dict['workload']['suffix_tag'] if 'suffix_tag' in runtime_dict['workload'] else None
|
||||||
self.terminateoncompletion = runtime_dict['workload']['terminate_on_completion'] == "yes"
|
self.terminateoncompletion = runtime_dict['workload']['terminate_on_completion'] == "yes"
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self) -> str:
|
||||||
return pprint.pformat(vars(self))
|
return pprint.pformat(vars(self))
|
||||||
|
|
||||||
class RuntimeConfig:
|
class RuntimeConfig:
|
||||||
""" This class manages the overall configuration of the manager for running
|
""" This class manages the overall configuration of the manager for running
|
||||||
simulation tasks. """
|
simulation tasks. """
|
||||||
|
|
||||||
def __init__(self, args: argparse.Namespace):
|
def __init__(self, args: argparse.Namespace) -> None:
|
||||||
""" This reads runtime configuration files, massages them into formats that
|
""" This reads runtime configuration files, massages them into formats that
|
||||||
the rest of the manager expects, and keeps track of other info. """
|
the rest of the manager expects, and keeps track of other info. """
|
||||||
self.launch_time = strftime("%Y-%m-%d--%H-%M-%S", gmtime())
|
self.launch_time = strftime("%Y-%m-%d--%H-%M-%S", gmtime())
|
||||||
|
@ -380,33 +394,33 @@ class RuntimeConfig:
|
||||||
self.innerconf.print_start, self.innerconf.print_end,
|
self.innerconf.print_start, self.innerconf.print_end,
|
||||||
self.innerconf.print_cycle_prefix)
|
self.innerconf.print_cycle_prefix)
|
||||||
|
|
||||||
def launch_run_farm(self):
|
def launch_run_farm(self) -> None:
|
||||||
""" directly called by top-level launchrunfarm command. """
|
""" directly called by top-level launchrunfarm command. """
|
||||||
self.runfarm.launch_run_farm()
|
self.runfarm.launch_run_farm()
|
||||||
|
|
||||||
def terminate_run_farm(self):
|
def terminate_run_farm(self) -> None:
|
||||||
""" directly called by top-level terminaterunfarm command. """
|
""" directly called by top-level terminaterunfarm command. """
|
||||||
args = self.args
|
args = self.args
|
||||||
self.runfarm.terminate_run_farm(args.terminatesomef116, args.terminatesomef14, args.terminatesomef12,
|
self.runfarm.terminate_run_farm(args.terminatesomef116, args.terminatesomef14, args.terminatesomef12,
|
||||||
args.terminatesomem416, args.forceterminate)
|
args.terminatesomem416, args.forceterminate)
|
||||||
|
|
||||||
def infrasetup(self):
|
def infrasetup(self) -> None:
|
||||||
""" directly called by top-level infrasetup command. """
|
""" directly called by top-level infrasetup command. """
|
||||||
# set this to True if you want to use mock boto3 instances for testing
|
# set this to True if you want to use mock boto3 instances for testing
|
||||||
# the manager.
|
# the manager.
|
||||||
use_mock_instances_for_testing = False
|
use_mock_instances_for_testing = False
|
||||||
self.firesim_topology_with_passes.infrasetup_passes(use_mock_instances_for_testing)
|
self.firesim_topology_with_passes.infrasetup_passes(use_mock_instances_for_testing)
|
||||||
|
|
||||||
def boot(self):
|
def boot(self) -> None:
|
||||||
""" directly called by top-level boot command. """
|
""" directly called by top-level boot command. """
|
||||||
use_mock_instances_for_testing = False
|
use_mock_instances_for_testing = False
|
||||||
self.firesim_topology_with_passes.boot_simulation_passes(use_mock_instances_for_testing)
|
self.firesim_topology_with_passes.boot_simulation_passes(use_mock_instances_for_testing)
|
||||||
|
|
||||||
def kill(self):
|
def kill(self) -> None:
|
||||||
use_mock_instances_for_testing = False
|
use_mock_instances_for_testing = False
|
||||||
self.firesim_topology_with_passes.kill_simulation_passes(use_mock_instances_for_testing)
|
self.firesim_topology_with_passes.kill_simulation_passes(use_mock_instances_for_testing)
|
||||||
|
|
||||||
def run_workload(self):
|
def run_workload(self) -> None:
|
||||||
use_mock_instances_for_testing = False
|
use_mock_instances_for_testing = False
|
||||||
self.firesim_topology_with_passes.run_workload_passes(use_mock_instances_for_testing)
|
self.firesim_topology_with_passes.run_workload_passes(use_mock_instances_for_testing)
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,19 @@
|
||||||
""" This file contains components that tie closely with the FireSim switch
|
""" This file contains components that tie closely with the FireSim switch
|
||||||
models that live in target-design/switch/ """
|
models that live in target-design/switch/ """
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from fabric.api import local # type: ignore
|
from fabric.api import local # type: ignore
|
||||||
from util.streamlogger import StreamLogger
|
from util.streamlogger import StreamLogger
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from runtools.firesim_topology_elements import FireSimSwitchNode
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
class AbstractSwitchToSwitchConfig:
|
class AbstractSwitchToSwitchConfig:
|
||||||
|
@ -17,15 +22,17 @@ class AbstractSwitchToSwitchConfig:
|
||||||
that behaves as defined in the FireSimSwitchNode.
|
that behaves as defined in the FireSimSwitchNode.
|
||||||
|
|
||||||
This assumes that the switch has already been assigned to a host."""
|
This assumes that the switch has already been assigned to a host."""
|
||||||
|
fsimswitchnode: FireSimSwitchNode
|
||||||
|
build_disambiguate: str
|
||||||
|
|
||||||
def __init__(self, fsimswitchnode):
|
def __init__(self, fsimswitchnode: FireSimSwitchNode) -> None:
|
||||||
""" Construct the switch's config file """
|
""" Construct the switch's config file """
|
||||||
self.fsimswitchnode = fsimswitchnode
|
self.fsimswitchnode = fsimswitchnode
|
||||||
# this lets us run many builds in parallel without conflict across
|
# this lets us run many builds in parallel without conflict across
|
||||||
# parallel experiments which may have overlapping switch ids
|
# parallel experiments which may have overlapping switch ids
|
||||||
self.build_disambiguate = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(64))
|
self.build_disambiguate = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(64))
|
||||||
|
|
||||||
def emit_init_for_uplink(self, uplinkno):
|
def emit_init_for_uplink(self, uplinkno: int) -> str:
|
||||||
""" Emit an init for a switch to talk to it's uplink."""
|
""" Emit an init for a switch to talk to it's uplink."""
|
||||||
|
|
||||||
linkobj = self.fsimswitchnode.uplinks[uplinkno]
|
linkobj = self.fsimswitchnode.uplinks[uplinkno]
|
||||||
|
@ -43,7 +50,7 @@ class AbstractSwitchToSwitchConfig:
|
||||||
linkbasename = linkobj.get_global_link_id()
|
linkbasename = linkobj.get_global_link_id()
|
||||||
return "new ShmemPort(" + str(target_local_portno) + ', "' + linkbasename + '", true);\n'
|
return "new ShmemPort(" + str(target_local_portno) + ', "' + linkbasename + '", true);\n'
|
||||||
|
|
||||||
def emit_init_for_downlink(self, downlinkno):
|
def emit_init_for_downlink(self, downlinkno: int) -> str:
|
||||||
""" emit an init for the specified downlink. """
|
""" emit an init for the specified downlink. """
|
||||||
downlinkobj = self.fsimswitchnode.downlinks[downlinkno]
|
downlinkobj = self.fsimswitchnode.downlinks[downlinkno]
|
||||||
downlink = downlinkobj.get_downlink_side()
|
downlink = downlinkobj.get_downlink_side()
|
||||||
|
@ -56,7 +63,7 @@ class AbstractSwitchToSwitchConfig:
|
||||||
linkbasename = downlinkobj.get_global_link_id()
|
linkbasename = downlinkobj.get_global_link_id()
|
||||||
return "new ShmemPort(" + str(downlinkno) + ', "' + linkbasename + '", false);\n'
|
return "new ShmemPort(" + str(downlinkno) + ', "' + linkbasename + '", false);\n'
|
||||||
|
|
||||||
def emit_switch_configfile(self):
|
def emit_switch_configfile(self) -> str:
|
||||||
""" Produce a config file for the switch generator for this switch """
|
""" Produce a config file for the switch generator for this switch """
|
||||||
constructedstring = ""
|
constructedstring = ""
|
||||||
constructedstring += self.get_header()
|
constructedstring += self.get_header()
|
||||||
|
@ -66,11 +73,12 @@ class AbstractSwitchToSwitchConfig:
|
||||||
return constructedstring
|
return constructedstring
|
||||||
|
|
||||||
# produce mac2port array portion of config
|
# produce mac2port array portion of config
|
||||||
def get_mac2port(self):
|
def get_mac2port(self) -> str:
|
||||||
""" This takes a python array that represents the mac to port mapping,
|
""" This takes a python array that represents the mac to port mapping,
|
||||||
and converts it to a C++ array """
|
and converts it to a C++ array """
|
||||||
|
|
||||||
mac2port_pythonarray = self.fsimswitchnode.switch_table
|
mac2port_pythonarray = self.fsimswitchnode.switch_table
|
||||||
|
assert mac2port_pythonarray is not None
|
||||||
|
|
||||||
commaseparated = ""
|
commaseparated = ""
|
||||||
for elem in mac2port_pythonarray:
|
for elem in mac2port_pythonarray:
|
||||||
|
@ -87,13 +95,13 @@ class AbstractSwitchToSwitchConfig:
|
||||||
""".format(len(mac2port_pythonarray), commaseparated)
|
""".format(len(mac2port_pythonarray), commaseparated)
|
||||||
return retstr
|
return retstr
|
||||||
|
|
||||||
def get_header(self):
|
def get_header(self) -> str:
|
||||||
""" Produce file header. """
|
""" Produce file header. """
|
||||||
retstr = """// THIS FILE IS MACHINE GENERATED. SEE deploy/buildtools/switchmodelconfig.py
|
retstr = """// THIS FILE IS MACHINE GENERATED. SEE deploy/buildtools/switchmodelconfig.py
|
||||||
"""
|
"""
|
||||||
return retstr
|
return retstr
|
||||||
|
|
||||||
def get_numclientsconfig(self):
|
def get_numclientsconfig(self) -> str:
|
||||||
""" Emit constants for num ports. """
|
""" Emit constants for num ports. """
|
||||||
numdownlinks = len(self.fsimswitchnode.downlinks)
|
numdownlinks = len(self.fsimswitchnode.downlinks)
|
||||||
numuplinks = len(self.fsimswitchnode.uplinks)
|
numuplinks = len(self.fsimswitchnode.uplinks)
|
||||||
|
@ -107,7 +115,7 @@ class AbstractSwitchToSwitchConfig:
|
||||||
#endif""".format(totalports, numdownlinks, numuplinks)
|
#endif""".format(totalports, numdownlinks, numuplinks)
|
||||||
return retstr
|
return retstr
|
||||||
|
|
||||||
def get_portsetup(self):
|
def get_portsetup(self) -> str:
|
||||||
""" emit port intialisations. """
|
""" emit port intialisations. """
|
||||||
initstring = ""
|
initstring = ""
|
||||||
for downlinkno in range(len(self.fsimswitchnode.downlinks)):
|
for downlinkno in range(len(self.fsimswitchnode.downlinks)):
|
||||||
|
@ -125,10 +133,10 @@ class AbstractSwitchToSwitchConfig:
|
||||||
""".format(initstring)
|
""".format(initstring)
|
||||||
return retstr
|
return retstr
|
||||||
|
|
||||||
def switch_binary_name(self):
|
def switch_binary_name(self) -> str:
|
||||||
return "switch" + str(self.fsimswitchnode.switch_id_internal)
|
return "switch" + str(self.fsimswitchnode.switch_id_internal)
|
||||||
|
|
||||||
def buildswitch(self):
|
def buildswitch(self) -> None:
|
||||||
""" Generate the config file, build the switch."""
|
""" Generate the config file, build the switch."""
|
||||||
|
|
||||||
configfile = self.emit_switch_configfile()
|
configfile = self.emit_switch_configfile()
|
||||||
|
@ -141,7 +149,7 @@ class AbstractSwitchToSwitchConfig:
|
||||||
|
|
||||||
rootLogger.debug(str(configfile))
|
rootLogger.debug(str(configfile))
|
||||||
|
|
||||||
def local_logged(command):
|
def local_logged(command: str) -> None:
|
||||||
""" Run local command with logging. """
|
""" Run local command with logging. """
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||||
localcap = local(command, capture=True)
|
localcap = local(command, capture=True)
|
||||||
|
@ -160,7 +168,7 @@ class AbstractSwitchToSwitchConfig:
|
||||||
local_logged("cd " + switchbuilddir + " && make")
|
local_logged("cd " + switchbuilddir + " && make")
|
||||||
local_logged("mv " + switchbuilddir + "switch " + switchbuilddir + binaryname)
|
local_logged("mv " + switchbuilddir + "switch " + switchbuilddir + binaryname)
|
||||||
|
|
||||||
def run_switch_simulation_command(self):
|
def run_switch_simulation_command(self) -> str:
|
||||||
""" Return the command to boot the switch."""
|
""" Return the command to boot the switch."""
|
||||||
switchlatency = self.fsimswitchnode.switch_switching_latency
|
switchlatency = self.fsimswitchnode.switch_switching_latency
|
||||||
linklatency = self.fsimswitchnode.switch_link_latency
|
linklatency = self.fsimswitchnode.switch_link_latency
|
||||||
|
@ -168,15 +176,15 @@ class AbstractSwitchToSwitchConfig:
|
||||||
# insert gdb -ex run --args between sudo and ./ below to start switches in gdb
|
# insert gdb -ex run --args between sudo and ./ below to start switches in gdb
|
||||||
return """screen -S {} -d -m bash -c "script -f -c 'sudo ./{} {} {} {}' switchlog"; sleep 1""".format(self.switch_binary_name(), self.switch_binary_name(), linklatency, switchlatency, bandwidth)
|
return """screen -S {} -d -m bash -c "script -f -c 'sudo ./{} {} {} {}' switchlog"; sleep 1""".format(self.switch_binary_name(), self.switch_binary_name(), linklatency, switchlatency, bandwidth)
|
||||||
|
|
||||||
def kill_switch_simulation_command(self):
|
def kill_switch_simulation_command(self) -> str:
|
||||||
""" Return the command to kill the switch. """
|
""" Return the command to kill the switch. """
|
||||||
return """sudo pkill {}""".format(self.switch_binary_name())
|
return """sudo pkill {}""".format(self.switch_binary_name())
|
||||||
|
|
||||||
def switch_build_local_dir(self):
|
def switch_build_local_dir(self) -> str:
|
||||||
""" get local build dir of the switch. """
|
""" get local build dir of the switch. """
|
||||||
return "../target-design/switch/"
|
return "../target-design/switch/"
|
||||||
|
|
||||||
def switch_binary_local_path(self):
|
def switch_binary_local_path(self) -> str:
|
||||||
""" return the full local path where the switch binary lives. """
|
""" return the full local path where the switch binary lives. """
|
||||||
binaryname = self.switch_binary_name()
|
binaryname = self.switch_binary_name()
|
||||||
switchorigdir = self.switch_build_local_dir()
|
switchorigdir = self.switch_build_local_dir()
|
||||||
|
|
|
@ -1,14 +1,27 @@
|
||||||
""" Define your additional topologies here. The FireSimTopology class inherits
|
""" Define your additional topologies here. The FireSimTopology class inherits
|
||||||
from UserToplogies and thus can instantiate your topology. """
|
from UserToplogies and thus can instantiate your topology. """
|
||||||
|
|
||||||
from runtools.firesim_topology_elements import *
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from runtools.firesim_topology_elements import FireSimSwitchNode, FireSimServerNode, FireSimSuperNodeServerNode, FireSimDummyServerNode, FireSimNode
|
||||||
|
|
||||||
class UserTopologies(object):
|
from typing import Optional, Union, Callable, Sequence, TYPE_CHECKING
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from runtools.firesim_topology_with_passes import FireSimTopologyWithPasses
|
||||||
|
|
||||||
|
class UserTopologies:
|
||||||
""" A class that just separates out user-defined/configurable topologies
|
""" A class that just separates out user-defined/configurable topologies
|
||||||
from the rest of the boilerplate in FireSimTopology() """
|
from the rest of the boilerplate in FireSimTopology() """
|
||||||
|
no_net_num_nodes: int
|
||||||
|
custom_mapper: Optional[Union[Callable, str]]
|
||||||
|
roots: Sequence[FireSimNode]
|
||||||
|
|
||||||
def clos_m_n_r(self, m, n, r):
|
def __init__(self, no_net_num_nodes: int) -> None:
|
||||||
|
self.no_net_num_nodes = no_net_num_nodes
|
||||||
|
self.custom_mapper = None
|
||||||
|
self.roots = []
|
||||||
|
|
||||||
|
def clos_m_n_r(self, m: int, n: int, r: int) -> None:
|
||||||
""" DO NOT USE THIS DIRECTLY, USE ONE OF THE INSTANTIATIONS BELOW. """
|
""" DO NOT USE THIS DIRECTLY, USE ONE OF THE INSTANTIATIONS BELOW. """
|
||||||
""" Clos topol where:
|
""" Clos topol where:
|
||||||
m = number of root switches
|
m = number of root switches
|
||||||
|
@ -46,21 +59,21 @@ class UserTopologies(object):
|
||||||
|
|
||||||
self.custom_mapper = custom_mapper
|
self.custom_mapper = custom_mapper
|
||||||
|
|
||||||
def clos_2_8_2(self):
|
def clos_2_8_2(self) -> None:
|
||||||
""" clos topol with:
|
""" clos topol with:
|
||||||
2 roots
|
2 roots
|
||||||
8 nodes/leaf
|
8 nodes/leaf
|
||||||
2 leaves. """
|
2 leaves. """
|
||||||
self.clos_m_n_r(2, 8, 2)
|
self.clos_m_n_r(2, 8, 2)
|
||||||
|
|
||||||
def clos_8_8_16(self):
|
def clos_8_8_16(self) -> None:
|
||||||
""" clos topol with:
|
""" clos topol with:
|
||||||
8 roots
|
8 roots
|
||||||
8 nodes/leaf
|
8 nodes/leaf
|
||||||
16 leaves. = 128 nodes."""
|
16 leaves. = 128 nodes."""
|
||||||
self.clos_m_n_r(8, 8, 16)
|
self.clos_m_n_r(8, 8, 16)
|
||||||
|
|
||||||
def fat_tree_4ary(self):
|
def fat_tree_4ary(self) -> None:
|
||||||
# 4-ary fat tree as described in
|
# 4-ary fat tree as described in
|
||||||
# http://ccr.sigcomm.org/online/files/p63-alfares.pdf
|
# http://ccr.sigcomm.org/online/files/p63-alfares.pdf
|
||||||
coreswitches = [FireSimSwitchNode() for x in range(4)]
|
coreswitches = [FireSimSwitchNode() for x in range(4)]
|
||||||
|
@ -71,8 +84,7 @@ class UserTopologies(object):
|
||||||
for switchno in range(len(coreswitches)):
|
for switchno in range(len(coreswitches)):
|
||||||
core = coreswitches[switchno]
|
core = coreswitches[switchno]
|
||||||
base = 0 if switchno < 2 else 1
|
base = 0 if switchno < 2 else 1
|
||||||
dls = range(base, 8, 2)
|
dls = list(map(lambda x: aggrswitches[x], range(base, 8, 2)))
|
||||||
dls = map(lambda x: aggrswitches[x], dls)
|
|
||||||
core.add_downlinks(dls)
|
core.add_downlinks(dls)
|
||||||
for switchbaseno in range(0, len(aggrswitches), 2):
|
for switchbaseno in range(0, len(aggrswitches), 2):
|
||||||
switchno = switchbaseno + 0
|
switchno = switchbaseno + 0
|
||||||
|
@ -85,7 +97,7 @@ class UserTopologies(object):
|
||||||
edgeswitches[edgeno].add_downlinks([servers[edgeno*2], servers[edgeno*2+1]])
|
edgeswitches[edgeno].add_downlinks([servers[edgeno*2], servers[edgeno*2+1]])
|
||||||
|
|
||||||
|
|
||||||
def custom_mapper(fsim_topol_with_passes):
|
def custom_mapper(fsim_topol_with_passes: FireSimTopologyWithPasses) -> None:
|
||||||
""" In a custom mapper, you have access to the firesim topology with passes,
|
""" In a custom mapper, you have access to the firesim topology with passes,
|
||||||
where you can access the run_farm nodes:
|
where you can access the run_farm nodes:
|
||||||
|
|
||||||
|
@ -120,7 +132,7 @@ class UserTopologies(object):
|
||||||
|
|
||||||
self.custom_mapper = custom_mapper
|
self.custom_mapper = custom_mapper
|
||||||
|
|
||||||
def example_multilink(self):
|
def example_multilink(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
midswitch = FireSimSwitchNode()
|
midswitch = FireSimSwitchNode()
|
||||||
lowerlayer = [midswitch for x in range(16)]
|
lowerlayer = [midswitch for x in range(16)]
|
||||||
|
@ -128,7 +140,7 @@ class UserTopologies(object):
|
||||||
servers = [FireSimServerNode()]
|
servers = [FireSimServerNode()]
|
||||||
midswitch.add_downlinks(servers)
|
midswitch.add_downlinks(servers)
|
||||||
|
|
||||||
def example_multilink_32(self):
|
def example_multilink_32(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
midswitch = FireSimSwitchNode()
|
midswitch = FireSimSwitchNode()
|
||||||
lowerlayer = [midswitch for x in range(32)]
|
lowerlayer = [midswitch for x in range(32)]
|
||||||
|
@ -136,7 +148,7 @@ class UserTopologies(object):
|
||||||
servers = [FireSimServerNode()]
|
servers = [FireSimServerNode()]
|
||||||
midswitch.add_downlinks(servers)
|
midswitch.add_downlinks(servers)
|
||||||
|
|
||||||
def example_multilink_64(self):
|
def example_multilink_64(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
midswitch = FireSimSwitchNode()
|
midswitch = FireSimSwitchNode()
|
||||||
lowerlayer = [midswitch for x in range(64)]
|
lowerlayer = [midswitch for x in range(64)]
|
||||||
|
@ -144,7 +156,7 @@ class UserTopologies(object):
|
||||||
servers = [FireSimServerNode()]
|
servers = [FireSimServerNode()]
|
||||||
midswitch.add_downlinks(servers)
|
midswitch.add_downlinks(servers)
|
||||||
|
|
||||||
def example_cross_links(self):
|
def example_cross_links(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode() for x in range(2)]
|
self.roots = [FireSimSwitchNode() for x in range(2)]
|
||||||
midswitches = [FireSimSwitchNode() for x in range(2)]
|
midswitches = [FireSimSwitchNode() for x in range(2)]
|
||||||
self.roots[0].add_downlinks(midswitches)
|
self.roots[0].add_downlinks(midswitches)
|
||||||
|
@ -153,7 +165,7 @@ class UserTopologies(object):
|
||||||
midswitches[0].add_downlinks([servers[0]])
|
midswitches[0].add_downlinks([servers[0]])
|
||||||
midswitches[1].add_downlinks([servers[1]])
|
midswitches[1].add_downlinks([servers[1]])
|
||||||
|
|
||||||
def small_hierarchy_8sims(self):
|
def small_hierarchy_8sims(self) -> None:
|
||||||
self.custom_mapper = 'mapping_use_one_f1_16xlarge'
|
self.custom_mapper = 'mapping_use_one_f1_16xlarge'
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
midlevel = [FireSimSwitchNode() for x in range(4)]
|
midlevel = [FireSimSwitchNode() for x in range(4)]
|
||||||
|
@ -162,7 +174,7 @@ class UserTopologies(object):
|
||||||
for swno in range(len(midlevel)):
|
for swno in range(len(midlevel)):
|
||||||
midlevel[swno].add_downlinks(servers[swno])
|
midlevel[swno].add_downlinks(servers[swno])
|
||||||
|
|
||||||
def small_hierarchy_2sims(self):
|
def small_hierarchy_2sims(self) -> None:
|
||||||
self.custom_mapper = 'mapping_use_one_f1_16xlarge'
|
self.custom_mapper = 'mapping_use_one_f1_16xlarge'
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
midlevel = [FireSimSwitchNode() for x in range(1)]
|
midlevel = [FireSimSwitchNode() for x in range(1)]
|
||||||
|
@ -171,27 +183,27 @@ class UserTopologies(object):
|
||||||
for swno in range(len(midlevel)):
|
for swno in range(len(midlevel)):
|
||||||
midlevel[swno].add_downlinks(servers[swno])
|
midlevel[swno].add_downlinks(servers[swno])
|
||||||
|
|
||||||
def example_1config(self):
|
def example_1config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = [FireSimServerNode() for y in range(1)]
|
servers = [FireSimServerNode() for y in range(1)]
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks(servers)
|
||||||
|
|
||||||
def example_2config(self):
|
def example_2config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = [FireSimServerNode() for y in range(2)]
|
servers = [FireSimServerNode() for y in range(2)]
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks(servers)
|
||||||
|
|
||||||
def example_4config(self):
|
def example_4config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = [FireSimServerNode() for y in range(4)]
|
servers = [FireSimServerNode() for y in range(4)]
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks(servers)
|
||||||
|
|
||||||
def example_8config(self):
|
def example_8config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = [FireSimServerNode() for y in range(8)]
|
servers = [FireSimServerNode() for y in range(8)]
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks(servers)
|
||||||
|
|
||||||
def example_16config(self):
|
def example_16config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level2switches = [FireSimSwitchNode() for x in range(2)]
|
level2switches = [FireSimSwitchNode() for x in range(2)]
|
||||||
servers = [[FireSimServerNode() for y in range(8)] for x in range(2)]
|
servers = [[FireSimServerNode() for y in range(8)] for x in range(2)]
|
||||||
|
@ -202,7 +214,7 @@ class UserTopologies(object):
|
||||||
for l2switchNo in range(len(level2switches)):
|
for l2switchNo in range(len(level2switches)):
|
||||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||||
|
|
||||||
def example_32config(self):
|
def example_32config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level2switches = [FireSimSwitchNode() for x in range(4)]
|
level2switches = [FireSimSwitchNode() for x in range(4)]
|
||||||
servers = [[FireSimServerNode() for y in range(8)] for x in range(4)]
|
servers = [[FireSimServerNode() for y in range(8)] for x in range(4)]
|
||||||
|
@ -213,7 +225,7 @@ class UserTopologies(object):
|
||||||
for l2switchNo in range(len(level2switches)):
|
for l2switchNo in range(len(level2switches)):
|
||||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||||
|
|
||||||
def example_64config(self):
|
def example_64config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level2switches = [FireSimSwitchNode() for x in range(8)]
|
level2switches = [FireSimSwitchNode() for x in range(8)]
|
||||||
servers = [[FireSimServerNode() for y in range(8)] for x in range(8)]
|
servers = [[FireSimServerNode() for y in range(8)] for x in range(8)]
|
||||||
|
@ -224,7 +236,7 @@ class UserTopologies(object):
|
||||||
for l2switchNo in range(len(level2switches)):
|
for l2switchNo in range(len(level2switches)):
|
||||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||||
|
|
||||||
def example_128config(self):
|
def example_128config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level1switches = [FireSimSwitchNode() for x in range(2)]
|
level1switches = [FireSimSwitchNode() for x in range(2)]
|
||||||
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(2)]
|
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(2)]
|
||||||
|
@ -239,7 +251,7 @@ class UserTopologies(object):
|
||||||
for switchno in range(len(level2switches[switchgroupno])):
|
for switchno in range(len(level2switches[switchgroupno])):
|
||||||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||||
|
|
||||||
def example_256config(self):
|
def example_256config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level1switches = [FireSimSwitchNode() for x in range(4)]
|
level1switches = [FireSimSwitchNode() for x in range(4)]
|
||||||
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(4)]
|
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(4)]
|
||||||
|
@ -261,29 +273,32 @@ class UserTopologies(object):
|
||||||
res = res + x
|
res = res + x
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def supernode_example_6config(self):
|
def supernode_example_6config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = [FireSimSuperNodeServerNode()] + [FireSimDummyServerNode() for x in range(5)]
|
self.roots[0].add_downlinks([FireSimSuperNodeServerNode()])
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks([FireSimDummyServerNode() for x in range(5)])
|
||||||
|
|
||||||
def supernode_example_4config(self):
|
def supernode_example_4config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = [FireSimSuperNodeServerNode()] + [FireSimDummyServerNode() for x in range(3)]
|
self.roots[0].add_downlinks([FireSimSuperNodeServerNode()])
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks([FireSimDummyServerNode() for x in range(3)])
|
||||||
def supernode_example_8config(self):
|
|
||||||
|
def supernode_example_8config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(2)])
|
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(2)])
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks(servers)
|
||||||
def supernode_example_16config(self):
|
|
||||||
|
def supernode_example_16config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(4)])
|
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(4)])
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks(servers)
|
||||||
def supernode_example_32config(self):
|
|
||||||
|
def supernode_example_32config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)])
|
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)])
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks(servers)
|
||||||
|
|
||||||
def supernode_example_64config(self):
|
def supernode_example_64config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level2switches = [FireSimSwitchNode() for x in range(2)]
|
level2switches = [FireSimSwitchNode() for x in range(2)]
|
||||||
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(2)]
|
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(2)]
|
||||||
|
@ -292,7 +307,7 @@ class UserTopologies(object):
|
||||||
for l2switchNo in range(len(level2switches)):
|
for l2switchNo in range(len(level2switches)):
|
||||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||||
|
|
||||||
def supernode_example_128config(self):
|
def supernode_example_128config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level2switches = [FireSimSwitchNode() for x in range(4)]
|
level2switches = [FireSimSwitchNode() for x in range(4)]
|
||||||
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(4)]
|
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(4)]
|
||||||
|
@ -301,7 +316,7 @@ class UserTopologies(object):
|
||||||
for l2switchNo in range(len(level2switches)):
|
for l2switchNo in range(len(level2switches)):
|
||||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||||
|
|
||||||
def supernode_example_256config(self):
|
def supernode_example_256config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level2switches = [FireSimSwitchNode() for x in range(8)]
|
level2switches = [FireSimSwitchNode() for x in range(8)]
|
||||||
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(8)]
|
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(8)]
|
||||||
|
@ -310,7 +325,7 @@ class UserTopologies(object):
|
||||||
for l2switchNo in range(len(level2switches)):
|
for l2switchNo in range(len(level2switches)):
|
||||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||||
|
|
||||||
def supernode_example_512config(self):
|
def supernode_example_512config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level1switches = [FireSimSwitchNode() for x in range(2)]
|
level1switches = [FireSimSwitchNode() for x in range(2)]
|
||||||
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(2)]
|
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(2)]
|
||||||
|
@ -322,7 +337,7 @@ class UserTopologies(object):
|
||||||
for switchno in range(len(level2switches[switchgroupno])):
|
for switchno in range(len(level2switches[switchgroupno])):
|
||||||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||||
|
|
||||||
def supernode_example_1024config(self):
|
def supernode_example_1024config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level1switches = [FireSimSwitchNode() for x in range(4)]
|
level1switches = [FireSimSwitchNode() for x in range(4)]
|
||||||
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(4)]
|
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(4)]
|
||||||
|
@ -334,7 +349,7 @@ class UserTopologies(object):
|
||||||
for switchno in range(len(level2switches[switchgroupno])):
|
for switchno in range(len(level2switches[switchgroupno])):
|
||||||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||||
|
|
||||||
def supernode_example_deep64config(self):
|
def supernode_example_deep64config(self) -> None:
|
||||||
self.roots = [FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode()]
|
||||||
level1switches = [FireSimSwitchNode() for x in range(2)]
|
level1switches = [FireSimSwitchNode() for x in range(2)]
|
||||||
level2switches = [[FireSimSwitchNode() for x in range(1)] for x in range(2)]
|
level2switches = [[FireSimSwitchNode() for x in range(1)] for x in range(2)]
|
||||||
|
@ -346,7 +361,7 @@ class UserTopologies(object):
|
||||||
for switchno in range(len(level2switches[switchgroupno])):
|
for switchno in range(len(level2switches[switchgroupno])):
|
||||||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||||
|
|
||||||
def dual_example_8config(self):
|
def dual_example_8config(self) -> None:
|
||||||
""" two separate 8-node clusters for experiments, e.g. memcached mutilate. """
|
""" two separate 8-node clusters for experiments, e.g. memcached mutilate. """
|
||||||
self.roots = [FireSimSwitchNode(), FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode(), FireSimSwitchNode()]
|
||||||
servers = [FireSimServerNode() for y in range(8)]
|
servers = [FireSimServerNode() for y in range(8)]
|
||||||
|
@ -354,7 +369,7 @@ class UserTopologies(object):
|
||||||
self.roots[0].add_downlinks(servers)
|
self.roots[0].add_downlinks(servers)
|
||||||
self.roots[1].add_downlinks(servers2)
|
self.roots[1].add_downlinks(servers2)
|
||||||
|
|
||||||
def triple_example_8config(self):
|
def triple_example_8config(self) -> None:
|
||||||
""" three separate 8-node clusters for experiments, e.g. memcached mutilate. """
|
""" three separate 8-node clusters for experiments, e.g. memcached mutilate. """
|
||||||
self.roots = [FireSimSwitchNode(), FireSimSwitchNode(), FireSimSwitchNode()]
|
self.roots = [FireSimSwitchNode(), FireSimSwitchNode(), FireSimSwitchNode()]
|
||||||
servers = [FireSimServerNode() for y in range(8)]
|
servers = [FireSimServerNode() for y in range(8)]
|
||||||
|
@ -364,11 +379,11 @@ class UserTopologies(object):
|
||||||
self.roots[1].add_downlinks(servers2)
|
self.roots[1].add_downlinks(servers2)
|
||||||
self.roots[2].add_downlinks(servers3)
|
self.roots[2].add_downlinks(servers3)
|
||||||
|
|
||||||
def no_net_config(self):
|
def no_net_config(self) -> None:
|
||||||
self.roots = [FireSimServerNode() for x in range(self.no_net_num_nodes)]
|
self.roots = [FireSimServerNode() for x in range(self.no_net_num_nodes)]
|
||||||
|
|
||||||
# Spins up all of the precompiled, unnetworked targets
|
# Spins up all of the precompiled, unnetworked targets
|
||||||
def all_no_net_targets_config(self):
|
def all_no_net_targets_config(self) -> None:
|
||||||
hwdb_entries = [
|
hwdb_entries = [
|
||||||
"firesim_boom_singlecore_no_nic_l2_llc4mb_ddr3",
|
"firesim_boom_singlecore_no_nic_l2_llc4mb_ddr3",
|
||||||
"firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3",
|
"firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3",
|
||||||
|
|
|
@ -1,14 +1,18 @@
|
||||||
""" Miscellaneous utils used by other buildtools pieces. """
|
""" Miscellaneous utils used by other buildtools pieces. """
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import lddwrap
|
import lddwrap
|
||||||
import logging
|
import logging
|
||||||
from os import fspath
|
from os import fspath
|
||||||
from os.path import realpath
|
from os.path import realpath
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from typing import List, Tuple, Type
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
def get_local_shared_libraries(elf):
|
def get_local_shared_libraries(elf: str) -> List[Tuple[str, str]]:
|
||||||
""" Given path to executable `exe`, returns a list of path tuples, (A, B), where:
|
""" Given path to executable `exe`, returns a list of path tuples, (A, B), where:
|
||||||
A is the local file path on the manager instance to the library
|
A is the local file path on the manager instance to the library
|
||||||
B is the destination file path on the runfarm instance relative to the driver
|
B is the destination file path on the runfarm instance relative to the driver
|
||||||
|
@ -360,10 +364,10 @@ def get_local_shared_libraries(elf):
|
||||||
]
|
]
|
||||||
|
|
||||||
libs = list()
|
libs = list()
|
||||||
rootLogger.debug(f"Identifying ldd dependencies for:{elf}")
|
rootLogger.debug(f"Identifying ldd dependencies for: {elf}")
|
||||||
for dso in lddwrap.list_dependencies(Path(elf)):
|
for dso in lddwrap.list_dependencies(Path(elf)):
|
||||||
if dso.soname is None:
|
if dso.soname is None:
|
||||||
assert '/ld-linux' in fspath(dso.path), f"dynamic linker is only allowed no soname, not: {dso}"
|
assert dso.path is not None and '/ld-linux' in fspath(dso.path), f"dynamic linker is only allowed no soname, not: {dso}"
|
||||||
continue
|
continue
|
||||||
if 'linux-vdso.so' in dso.soname:
|
if 'linux-vdso.so' in dso.soname:
|
||||||
continue
|
continue
|
||||||
|
@ -399,10 +403,12 @@ class MacAddress():
|
||||||
>>> mac.as_int_no_prefix()
|
>>> mac.as_int_no_prefix()
|
||||||
3
|
3
|
||||||
"""
|
"""
|
||||||
next_mac_alloc = 2
|
next_mac_alloc: int = 2
|
||||||
eecs_mac_prefix = 0x00126d000000
|
eecs_mac_prefix: int = 0x00126d000000
|
||||||
|
mac_without_prefix_as_int: int
|
||||||
|
mac_as_int: int
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
""" Allocate a new mac address, store it, then increment nextmacalloc."""
|
""" Allocate a new mac address, store it, then increment nextmacalloc."""
|
||||||
assert MacAddress.next_mac_alloc < 2**24, "Too many MAC addresses allocated"
|
assert MacAddress.next_mac_alloc < 2**24, "Too many MAC addresses allocated"
|
||||||
self.mac_without_prefix_as_int = MacAddress.next_mac_alloc
|
self.mac_without_prefix_as_int = MacAddress.next_mac_alloc
|
||||||
|
@ -411,12 +417,12 @@ class MacAddress():
|
||||||
# increment for next call
|
# increment for next call
|
||||||
MacAddress.next_mac_alloc += 1
|
MacAddress.next_mac_alloc += 1
|
||||||
|
|
||||||
def as_int_no_prefix(self):
|
def as_int_no_prefix(self) -> int:
|
||||||
""" Return the MAC address as an int. WITHOUT THE PREFIX!
|
""" Return the MAC address as an int. WITHOUT THE PREFIX!
|
||||||
Used by the MAC tables in switch models."""
|
Used by the MAC tables in switch models."""
|
||||||
return self.mac_without_prefix_as_int
|
return self.mac_without_prefix_as_int
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self) -> str:
|
||||||
""" Return the MAC address in the "regular format": colon separated,
|
""" Return the MAC address in the "regular format": colon separated,
|
||||||
show all leading zeroes."""
|
show all leading zeroes."""
|
||||||
# format as 12 char hex with leading zeroes
|
# format as 12 char hex with leading zeroes
|
||||||
|
@ -428,12 +434,12 @@ class MacAddress():
|
||||||
return ":".join(split_str_ver)
|
return ":".join(split_str_ver)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def reset_allocator(cls):
|
def reset_allocator(cls: Type[MacAddress]) -> None:
|
||||||
""" Reset allocator back to default value. """
|
""" Reset allocator back to default value. """
|
||||||
cls.next_mac_alloc = 2
|
cls.next_mac_alloc = 2
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def next_mac_to_allocate(cls):
|
def next_mac_to_allocate(cls: Type[MacAddress]) -> int:
|
||||||
""" Return the next mac that will be allocated. This basically tells you
|
""" Return the next mac that will be allocated. This basically tells you
|
||||||
how many entries you need in your switching tables. """
|
how many entries you need in your switching tables. """
|
||||||
return cls.next_mac_alloc
|
return cls.next_mac_alloc
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
""" Workload configuration information. """
|
""" Workload configuration information. """
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from typing import List, Optional, Dict, Any, Tuple
|
||||||
|
|
||||||
class JobConfig:
|
class JobConfig:
|
||||||
""" A single job that runs on a simulation.
|
""" A single job that runs on a simulation.
|
||||||
E.g. one spec benchmark, one of the risc-v tests, etc.
|
E.g. one spec benchmark, one of the risc-v tests, etc.
|
||||||
|
@ -13,24 +17,31 @@ class JobConfig:
|
||||||
This essentially describes the local pieces that need to be fed to
|
This essentially describes the local pieces that need to be fed to
|
||||||
simulations and the remote outputs that need to be copied back. """
|
simulations and the remote outputs that need to be copied back. """
|
||||||
|
|
||||||
filesystemsuffix = ".ext2"
|
filesystemsuffix: str = ".ext2"
|
||||||
|
parent_workload: WorkloadConfig
|
||||||
|
jobname: str
|
||||||
|
outputs: List[str]
|
||||||
|
simoutputs: List[str]
|
||||||
|
siminputs: List[str]
|
||||||
|
bootbinary: str
|
||||||
|
rootfs: Optional[str]
|
||||||
|
|
||||||
def __init__(self, singlejob_dict, parent_workload, index=0):
|
def __init__(self, singlejob_dict: Dict[str, Any], parent_workload: WorkloadConfig, index: int = 0) -> None:
|
||||||
self.parent_workload = parent_workload
|
self.parent_workload = parent_workload
|
||||||
self.jobname = singlejob_dict.get("name", self.parent_workload.workload_name + str(index))
|
self.jobname = singlejob_dict.get("name", self.parent_workload.workload_name + str(index))
|
||||||
# ignore files, command, we assume they are used only to build rootfses
|
# ignore files, command, we assume they are used only to build rootfses
|
||||||
# eventually this functionality will be merged into the manager too
|
# eventually this functionality will be merged into the manager too
|
||||||
joboutputs = singlejob_dict.get("outputs", [])
|
joboutputs = singlejob_dict.get("outputs", [])
|
||||||
self.outputs = joboutputs + parent_workload.common_outputs
|
self.outputs = joboutputs + self.parent_workload.common_outputs
|
||||||
simoutputs = singlejob_dict.get("simulation_outputs", [])
|
simoutputs = singlejob_dict.get("simulation_outputs", [])
|
||||||
self.simoutputs = simoutputs + parent_workload.common_simulation_outputs
|
self.simoutputs = simoutputs + self.parent_workload.common_simulation_outputs
|
||||||
siminputs = singlejob_dict.get("simulation_inputs", [])
|
siminputs = singlejob_dict.get("simulation_inputs", [])
|
||||||
self.siminputs = siminputs + parent_workload.common_simulation_inputs
|
self.siminputs = siminputs + self.parent_workload.common_simulation_inputs
|
||||||
|
|
||||||
if singlejob_dict.get("bootbinary") is not None:
|
if singlejob_dict.get("bootbinary") is not None:
|
||||||
self.bootbinary = singlejob_dict.get("bootbinary")
|
self.bootbinary = singlejob_dict["bootbinary"]
|
||||||
else:
|
else:
|
||||||
self.bootbinary = parent_workload.common_bootbinary
|
self.bootbinary = self.parent_workload.common_bootbinary
|
||||||
|
|
||||||
if 'rootfs' in singlejob_dict:
|
if 'rootfs' in singlejob_dict:
|
||||||
if singlejob_dict['rootfs'] is None:
|
if singlejob_dict['rootfs'] is None:
|
||||||
|
@ -38,30 +49,30 @@ class JobConfig:
|
||||||
self.rootfs = None
|
self.rootfs = None
|
||||||
else:
|
else:
|
||||||
# Explicit per-job rootfs
|
# Explicit per-job rootfs
|
||||||
self.rootfs = parent_workload.workload_input_base_dir + singlejob_dict['rootfs']
|
self.rootfs = self.parent_workload.workload_input_base_dir + singlejob_dict['rootfs']
|
||||||
else:
|
else:
|
||||||
# No explicit per-job rootfs, inherit from workload
|
# No explicit per-job rootfs, inherit from workload
|
||||||
if parent_workload.derive_rootfs:
|
if self.parent_workload.derive_rootfs:
|
||||||
# No explicit workload rootfs, derive path from job name
|
# No explicit workload rootfs, derive path from job name
|
||||||
self.rootfs = self.parent_workload.workload_input_base_dir + self.jobname + self.filesystemsuffix
|
self.rootfs = self.parent_workload.workload_input_base_dir + self.jobname + self.filesystemsuffix
|
||||||
elif parent_workload.common_rootfs is None:
|
elif self.parent_workload.common_rootfs is None:
|
||||||
# Don't include a rootfs
|
# Don't include a rootfs
|
||||||
self.rootfs = None
|
self.rootfs = None
|
||||||
else:
|
else:
|
||||||
# Explicit rootfs path from workload
|
# Explicit rootfs path from workload
|
||||||
self.rootfs = self.parent_workload.workload_input_base_dir + self.parent_workload.common_rootfs
|
self.rootfs = self.parent_workload.workload_input_base_dir + self.parent_workload.common_rootfs
|
||||||
|
|
||||||
def bootbinary_path(self):
|
def bootbinary_path(self) -> str:
|
||||||
return self.parent_workload.workload_input_base_dir + self.bootbinary
|
return self.parent_workload.workload_input_base_dir + self.bootbinary
|
||||||
|
|
||||||
def get_siminputs(self):
|
def get_siminputs(self) -> List[Tuple[str, str]]:
|
||||||
# remote filename for a siminput gets prefixed with the job's name
|
# remote filename for a siminput gets prefixed with the job's name
|
||||||
return list(map(lambda x: (self.parent_workload.workload_input_base_dir + "/" + x, self.jobname + "-" + x), self.siminputs))
|
return list(map(lambda x: (self.parent_workload.workload_input_base_dir + "/" + x, self.jobname + "-" + x), self.siminputs))
|
||||||
|
|
||||||
def rootfs_path(self):
|
def rootfs_path(self) -> Optional[str]:
|
||||||
return self.rootfs
|
return self.rootfs
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self) -> str:
|
||||||
return self.jobname
|
return self.jobname
|
||||||
|
|
||||||
class WorkloadConfig:
|
class WorkloadConfig:
|
||||||
|
@ -72,10 +83,23 @@ class WorkloadConfig:
|
||||||
2) there is one "job" - a binary/rootfs combo to be run on all sims
|
2) there is one "job" - a binary/rootfs combo to be run on all sims
|
||||||
"""
|
"""
|
||||||
|
|
||||||
workloadinputs = 'workloads/'
|
workloadinputs: str = 'workloads/'
|
||||||
workloadoutputs = 'results-workloads/'
|
workloadoutputs: str = 'results-workloads/'
|
||||||
|
workloadfilename: str
|
||||||
|
common_rootfs: Optional[str]
|
||||||
|
derive_rootfs: bool
|
||||||
|
common_bootbinary: str
|
||||||
|
workload_name: str
|
||||||
|
common_outputs: str
|
||||||
|
common_simulation_outputs: List[str]
|
||||||
|
common_simulation_inputs: List[str]
|
||||||
|
workload_input_base_dir: str
|
||||||
|
uniform_mode: bool
|
||||||
|
jobs: List[JobConfig]
|
||||||
|
post_run_hook: str
|
||||||
|
job_results_dir: str
|
||||||
|
|
||||||
def __init__(self, workloadfilename, launch_time, suffixtag):
|
def __init__(self, workloadfilename: str, launch_time: str, suffixtag: str) -> None:
|
||||||
self.workloadfilename = self.workloadinputs + workloadfilename
|
self.workloadfilename = self.workloadinputs + workloadfilename
|
||||||
workloadjson = None
|
workloadjson = None
|
||||||
with open(self.workloadfilename) as json_data:
|
with open(self.workloadfilename) as json_data:
|
||||||
|
@ -120,13 +144,13 @@ class WorkloadConfig:
|
||||||
#import code
|
#import code
|
||||||
#code.interact(local=locals())
|
#code.interact(local=locals())
|
||||||
|
|
||||||
def get_job(self, index):
|
def get_job(self, index: int) -> JobConfig:
|
||||||
if not self.uniform_mode:
|
if not self.uniform_mode:
|
||||||
return self.jobs[index]
|
return self.jobs[index]
|
||||||
else:
|
else:
|
||||||
return JobConfig(dict(), self, index)
|
return JobConfig(dict(), self, index)
|
||||||
|
|
||||||
def are_all_jobs_assigned(self, numjobsassigned):
|
def are_all_jobs_assigned(self, numjobsassigned: int) -> bool:
|
||||||
""" Return True if each job is assigned to at least one simulation.
|
""" Return True if each job is assigned to at least one simulation.
|
||||||
In the uniform case, always return True """
|
In the uniform case, always return True """
|
||||||
if not self.uniform_mode:
|
if not self.uniform_mode:
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import print_function
|
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
# Do NOT import any firesim code being tested that might open connections to AWS here.
|
# Do NOT import any firesim code being tested that might open connections to AWS here.
|
||||||
|
|
|
@ -1,16 +1,18 @@
|
||||||
"""\
|
"""See `StreamLogger`.
|
||||||
See `StreamLogger`.
|
|
||||||
|
|
||||||
This is taken from https://gist.github.com/pmuller/2376336
|
This is taken from https://gist.github.com/pmuller/2376336
|
||||||
which has no license associated with it.
|
which has no license associated with it.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import logging
|
import logging
|
||||||
import io
|
import io
|
||||||
|
|
||||||
|
from typing import Any, Optional, Tuple
|
||||||
|
|
||||||
class StreamLogger(object):
|
class StreamLogger:
|
||||||
"""
|
"""
|
||||||
A helper which intercepts what's written to an output stream
|
A helper which intercepts what's written to an output stream
|
||||||
then sends it, line by line, to a `logging.Logger` instance.
|
then sends it, line by line, to a `logging.Logger` instance.
|
||||||
|
@ -22,9 +24,15 @@ class StreamLogger(object):
|
||||||
with StreamLogger('stdout'):
|
with StreamLogger('stdout'):
|
||||||
print 'foo'
|
print 'foo'
|
||||||
"""
|
"""
|
||||||
|
__name: str
|
||||||
|
__stream: Any
|
||||||
|
__logger: Optional[logging.Logger]
|
||||||
|
__buffer: io.StringIO
|
||||||
|
__unbuffered: bool
|
||||||
|
__flush_on_new_line: bool
|
||||||
|
|
||||||
def __init__(self, name, logger=None, unbuffered=False,
|
def __init__(self, name: str, logger: logging.Logger = None, unbuffered: bool = False,
|
||||||
flush_on_new_line=True):
|
flush_on_new_line: bool = True) -> None:
|
||||||
"""
|
"""
|
||||||
``name``: The stream name to incercept ('stdout' or 'stderr')
|
``name``: The stream name to incercept ('stdout' or 'stderr')
|
||||||
``logger``: The logger that will receive what's written to the stream.
|
``logger``: The logger that will receive what's written to the stream.
|
||||||
|
@ -41,7 +49,7 @@ class StreamLogger(object):
|
||||||
self.__unbuffered = unbuffered
|
self.__unbuffered = unbuffered
|
||||||
self.__flush_on_new_line = flush_on_new_line
|
self.__flush_on_new_line = flush_on_new_line
|
||||||
|
|
||||||
def write(self, data):
|
def write(self, data: str) -> None:
|
||||||
"""Write data to the stream.
|
"""Write data to the stream.
|
||||||
"""
|
"""
|
||||||
self.__buffer.write(data)
|
self.__buffer.write(data)
|
||||||
|
@ -49,7 +57,7 @@ class StreamLogger(object):
|
||||||
(self.__flush_on_new_line is True and '\n' in data):
|
(self.__flush_on_new_line is True and '\n' in data):
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
||||||
def flush(self):
|
def flush(self) -> None:
|
||||||
"""Flush the stream.
|
"""Flush the stream.
|
||||||
"""
|
"""
|
||||||
self.__buffer.seek(0)
|
self.__buffer.seek(0)
|
||||||
|
@ -72,22 +80,22 @@ class StreamLogger(object):
|
||||||
self.__buffer.truncate()
|
self.__buffer.truncate()
|
||||||
break
|
break
|
||||||
|
|
||||||
def parse(self, data):
|
def parse(self, data: str) -> Tuple[str, str]:
|
||||||
"""Override me!
|
"""Override me!
|
||||||
"""
|
"""
|
||||||
return 'debug', data
|
return 'debug', data
|
||||||
|
|
||||||
def isatty(self):
|
def isatty(self) -> bool:
|
||||||
"""I'm not a tty.
|
"""I'm not a tty.
|
||||||
"""
|
"""
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self) -> None:
|
||||||
"""Enter the context manager.
|
"""Enter the context manager.
|
||||||
"""
|
"""
|
||||||
setattr(sys, self.__name, self)
|
setattr(sys, self.__name, self)
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, traceback):
|
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
|
||||||
"""Leave the context manager.
|
"""Leave the context manager.
|
||||||
"""
|
"""
|
||||||
setattr(sys, self.__name, self.__stream)
|
setattr(sys, self.__name, self.__stream)
|
||||||
|
@ -96,5 +104,5 @@ class StreamLogger(object):
|
||||||
class InfoStreamLogger(StreamLogger):
|
class InfoStreamLogger(StreamLogger):
|
||||||
""" StreamLogger, but write to info log instead of debug. """
|
""" StreamLogger, but write to info log instead of debug. """
|
||||||
|
|
||||||
def parse(self, data):
|
def parse(self, data: str) -> Tuple[str, str]:
|
||||||
return 'info', data
|
return 'info', data
|
||||||
|
|
Loading…
Reference in New Issue