cutlass/docs/classcutlass_1_1gemm_1_1dev...

749 lines
121 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.11"/>
<title>CUTLASS: cutlass::gemm::device::GemmSplitKParallel&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt; Class Template Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { init_search(); });
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">CUTLASS
</div>
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.11 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="index.html"><span>Main&#160;Page</span></a></li>
<li><a href="modules.html"><span>Modules</span></a></li>
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
<li><a href="files.html"><span>Files</span></a></li>
<li>
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
<div id="navrow2" class="tabs2">
<ul class="tablist">
<li><a href="annotated.html"><span>Class&#160;List</span></a></li>
<li><a href="classes.html"><span>Class&#160;Index</span></a></li>
<li><a href="inherits.html"><span>Class&#160;Hierarchy</span></a></li>
<li><a href="functions.html"><span>Class&#160;Members</span></a></li>
</ul>
</div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="namespacecutlass_1_1gemm.html">gemm</a></li><li class="navelem"><a class="el" href="namespacecutlass_1_1gemm_1_1device.html">device</a></li><li class="navelem"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">GemmSplitKParallel</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#nested-classes">Classes</a> &#124;
<a href="#pub-types">Public Types</a> &#124;
<a href="#pub-methods">Public Member Functions</a> &#124;
<a href="#pub-static-methods">Static Public Member Functions</a> &#124;
<a href="#pub-static-attribs">Static Public Attributes</a> &#124;
<a href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel-members.html">List of all members</a> </div>
<div class="headertitle">
<div class="title">cutlass::gemm::device::GemmSplitKParallel&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt; Class Template Reference</div> </div>
</div><!--header-->
<div class="contents">
<p><code>#include &lt;<a class="el" href="device_2gemm__splitk__parallel_8h_source.html">gemm_splitk_parallel.h</a>&gt;</code></p>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
Classes</h2></td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a></td></tr>
<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Argument structure. <a href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html#details">More...</a><br /></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
Public Types</h2></td></tr>
<tr class="memitem:a372091a0c8d52bb9aab75c274039ceb7"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a372091a0c8d52bb9aab75c274039ceb7">ElementA</a> = ElementA_</td></tr>
<tr class="separator:a372091a0c8d52bb9aab75c274039ceb7"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ae59bd456dc03a51e64a655367382535d"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae59bd456dc03a51e64a655367382535d">LayoutA</a> = LayoutA_</td></tr>
<tr class="separator:ae59bd456dc03a51e64a655367382535d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a5a1c15eb1246208cf00bd811b8df85ba"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5a1c15eb1246208cf00bd811b8df85ba">ElementB</a> = ElementB_</td></tr>
<tr class="separator:a5a1c15eb1246208cf00bd811b8df85ba"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a08550ee41dd02322c514364035f5d098"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a08550ee41dd02322c514364035f5d098">LayoutB</a> = LayoutB_</td></tr>
<tr class="separator:a08550ee41dd02322c514364035f5d098"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a4026bdedc7cf880b58acbdfa716f7f28"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4026bdedc7cf880b58acbdfa716f7f28">ElementC</a> = ElementC_</td></tr>
<tr class="separator:a4026bdedc7cf880b58acbdfa716f7f28"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ab2572b47bae5f0ae0f06647cfe86ad4e"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2572b47bae5f0ae0f06647cfe86ad4e">LayoutC</a> = LayoutC_</td></tr>
<tr class="separator:ab2572b47bae5f0ae0f06647cfe86ad4e"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a56649fb86263ffd12aae9f7ae2e6506e"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a> = ElementAccumulator_</td></tr>
<tr class="separator:a56649fb86263ffd12aae9f7ae2e6506e"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a1bc1686f83acac48e8a1695b71518b16"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a1bc1686f83acac48e8a1695b71518b16">OperatorClass</a> = OperatorClass_</td></tr>
<tr class="separator:a1bc1686f83acac48e8a1695b71518b16"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a2a48eb6e51e2315e945882d5e70ebb2f"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2a48eb6e51e2315e945882d5e70ebb2f">ArchTag</a> = ArchTag_</td></tr>
<tr class="separator:a2a48eb6e51e2315e945882d5e70ebb2f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ad174d9818e8a4857b65e5da6da7a45b3"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ad174d9818e8a4857b65e5da6da7a45b3">ThreadblockShape</a> = ThreadblockShape_</td></tr>
<tr class="separator:ad174d9818e8a4857b65e5da6da7a45b3"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a36698ab97ffbc16808b60f041a1e583a"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a36698ab97ffbc16808b60f041a1e583a">WarpShape</a> = WarpShape_</td></tr>
<tr class="separator:a36698ab97ffbc16808b60f041a1e583a"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ab2e2468a859f14502fd18013859ec9e6"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2e2468a859f14502fd18013859ec9e6">InstructionShape</a> = InstructionShape_</td></tr>
<tr class="separator:ab2e2468a859f14502fd18013859ec9e6"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:affb7a5c96c9e8b04eb94a464e5fdc48b"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#affb7a5c96c9e8b04eb94a464e5fdc48b">ConvertScaledOp</a> = ConvertScaledOp_</td></tr>
<tr class="separator:affb7a5c96c9e8b04eb94a464e5fdc48b"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a2b8223379b65f5117d4d8dfd4f6cbc13"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2b8223379b65f5117d4d8dfd4f6cbc13">EpilogueOutputOp</a> = EpilogueOutputOp_</td></tr>
<tr class="separator:a2b8223379b65f5117d4d8dfd4f6cbc13"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac82ba3da12b03bc91586a3947ce99fc5"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ac82ba3da12b03bc91586a3947ce99fc5">ReductionOp</a> = ReductionOp_</td></tr>
<tr class="separator:ac82ba3da12b03bc91586a3947ce99fc5"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aacc6cfe6ebe0b33ec7577c654303f70d"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aacc6cfe6ebe0b33ec7577c654303f70d">ThreadblockSwizzle</a> = ThreadblockSwizzle_</td></tr>
<tr class="separator:aacc6cfe6ebe0b33ec7577c654303f70d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ae436b25ceca72104f23d09442de78f73"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae436b25ceca72104f23d09442de78f73">Operator</a> = Operator_</td></tr>
<tr class="separator:ae436b25ceca72104f23d09442de78f73"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a4a100b8064c285bdb06fe3e523a07d1c"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4a100b8064c285bdb06fe3e523a07d1c">GemmKernel</a> = typename <a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemmSplitKParallel.html">kernel::DefaultGemmSplitKParallel</a>&lt; <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a372091a0c8d52bb9aab75c274039ceb7">ElementA</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae59bd456dc03a51e64a655367382535d">LayoutA</a>, kAlignmentA, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5a1c15eb1246208cf00bd811b8df85ba">ElementB</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a08550ee41dd02322c514364035f5d098">LayoutB</a>, kAlignmentB, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2572b47bae5f0ae0f06647cfe86ad4e">LayoutC</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a1bc1686f83acac48e8a1695b71518b16">OperatorClass</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2a48eb6e51e2315e945882d5e70ebb2f">ArchTag</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ad174d9818e8a4857b65e5da6da7a45b3">ThreadblockShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a36698ab97ffbc16808b60f041a1e583a">WarpShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2e2468a859f14502fd18013859ec9e6">InstructionShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#affb7a5c96c9e8b04eb94a464e5fdc48b">ConvertScaledOp</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aacc6cfe6ebe0b33ec7577c654303f70d">ThreadblockSwizzle</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#af1f647942f7734bbf01e473118f2512c">kStages</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae436b25ceca72104f23d09442de78f73">Operator</a> &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4a100b8064c285bdb06fe3e523a07d1c">GemmKernel</a></td></tr>
<tr class="memdesc:a4a100b8064c285bdb06fe3e523a07d1c"><td class="mdescLeft">&#160;</td><td class="mdescRight">GEMM kernel. <a href="#a4a100b8064c285bdb06fe3e523a07d1c">More...</a><br /></td></tr>
<tr class="separator:a4a100b8064c285bdb06fe3e523a07d1c"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aaf83264eb3effceee610d9547ddf32e9"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aaf83264eb3effceee610d9547ddf32e9">ReductionKernel</a> = <a class="el" href="classcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK.html">cutlass::reduction::kernel::ReduceSplitK</a>&lt; <a class="el" href="structcutlass_1_1MatrixShape.html">cutlass::MatrixShape</a>&lt; 4, 32 *EpilogueOutputOp::kCount &gt;, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2b8223379b65f5117d4d8dfd4f6cbc13">EpilogueOutputOp</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ac82ba3da12b03bc91586a3947ce99fc5">ReductionOp</a> &gt;</td></tr>
<tr class="memdesc:aaf83264eb3effceee610d9547ddf32e9"><td class="mdescLeft">&#160;</td><td class="mdescRight">Reduction kernel. <a href="#aaf83264eb3effceee610d9547ddf32e9">More...</a><br /></td></tr>
<tr class="separator:aaf83264eb3effceee610d9547ddf32e9"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
Public Member Functions</h2></td></tr>
<tr class="memitem:abfb1166a9c55270ff8f1b265516a418c"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#abfb1166a9c55270ff8f1b265516a418c">GemmSplitKParallel</a> ()</td></tr>
<tr class="memdesc:abfb1166a9c55270ff8f1b265516a418c"><td class="mdescLeft">&#160;</td><td class="mdescRight">Constructs the GEMM. <a href="#abfb1166a9c55270ff8f1b265516a418c">More...</a><br /></td></tr>
<tr class="separator:abfb1166a9c55270ff8f1b265516a418c"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a7085b7cf85bc1bcd202ea6928656d966"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a7085b7cf85bc1bcd202ea6928656d966">initialize</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;args, void *workspace)</td></tr>
<tr class="memdesc:a7085b7cf85bc1bcd202ea6928656d966"><td class="mdescLeft">&#160;</td><td class="mdescRight">Initializes GEMM state from arguments. <a href="#a7085b7cf85bc1bcd202ea6928656d966">More...</a><br /></td></tr>
<tr class="separator:a7085b7cf85bc1bcd202ea6928656d966"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a07ad725857d7eb191cbfc135df22b781"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a07ad725857d7eb191cbfc135df22b781">update</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;args, void *workspace=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>)</td></tr>
<tr class="memdesc:a07ad725857d7eb191cbfc135df22b781"><td class="mdescLeft">&#160;</td><td class="mdescRight">Lightweight update given a subset of arguments. <a href="#a07ad725857d7eb191cbfc135df22b781">More...</a><br /></td></tr>
<tr class="separator:a07ad725857d7eb191cbfc135df22b781"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aeadc76210d06ec22776aca4a58de9930"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aeadc76210d06ec22776aca4a58de9930">run</a> (cudaStream_t stream=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>)</td></tr>
<tr class="memdesc:aeadc76210d06ec22776aca4a58de9930"><td class="mdescLeft">&#160;</td><td class="mdescRight">Runs the kernel using initialized state. <a href="#aeadc76210d06ec22776aca4a58de9930">More...</a><br /></td></tr>
<tr class="separator:aeadc76210d06ec22776aca4a58de9930"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a5d934b66a5ead9df63bea323abf8f83f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5d934b66a5ead9df63bea323abf8f83f">operator()</a> (cudaStream_t stream=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>)</td></tr>
<tr class="memdesc:a5d934b66a5ead9df63bea323abf8f83f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Runs the kernel using initialized state. <a href="#a5d934b66a5ead9df63bea323abf8f83f">More...</a><br /></td></tr>
<tr class="separator:a5d934b66a5ead9df63bea323abf8f83f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:af324f9dd7abc0e3ae6007e6dcf186190"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#af324f9dd7abc0e3ae6007e6dcf186190">operator()</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;args, void *workspace=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>, cudaStream_t stream=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>)</td></tr>
<tr class="memdesc:af324f9dd7abc0e3ae6007e6dcf186190"><td class="mdescLeft">&#160;</td><td class="mdescRight">Runs the kernel using initialized state. <a href="#af324f9dd7abc0e3ae6007e6dcf186190">More...</a><br /></td></tr>
<tr class="separator:af324f9dd7abc0e3ae6007e6dcf186190"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-methods"></a>
Static Public Member Functions</h2></td></tr>
<tr class="memitem:a114b122602b425909f9be0df461353a4"><td class="memItemLeft" align="right" valign="top">static <a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a114b122602b425909f9be0df461353a4">can_implement</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;args)</td></tr>
<tr class="memdesc:a114b122602b425909f9be0df461353a4"><td class="mdescLeft">&#160;</td><td class="mdescRight">Determines whether the GEMM can execute the given problem. <a href="#a114b122602b425909f9be0df461353a4">More...</a><br /></td></tr>
<tr class="separator:a114b122602b425909f9be0df461353a4"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ae8fb82c40078cf84c211f10f726caaf5"><td class="memItemLeft" align="right" valign="top">static size_t&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae8fb82c40078cf84c211f10f726caaf5">get_workspace_size</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;args)</td></tr>
<tr class="memdesc:ae8fb82c40078cf84c211f10f726caaf5"><td class="mdescLeft">&#160;</td><td class="mdescRight">Gets the workspace size. <a href="#ae8fb82c40078cf84c211f10f726caaf5">More...</a><br /></td></tr>
<tr class="separator:ae8fb82c40078cf84c211f10f726caaf5"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-attribs"></a>
Static Public Attributes</h2></td></tr>
<tr class="memitem:af1f647942f7734bbf01e473118f2512c"><td class="memItemLeft" align="right" valign="top">static int const&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#af1f647942f7734bbf01e473118f2512c">kStages</a> = Stages</td></tr>
<tr class="separator:af1f647942f7734bbf01e473118f2512c"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><h3>template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt;<br />
class cutlass::gemm::device::GemmSplitKParallel&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;</h3>
<p><a class="el" href="classcutlass_1_1gemm_1_1device_1_1Gemm.html">Gemm</a> device-level operator performing parallel reduction over the K partition. </p>
</div><h2 class="groupheader">Member Typedef Documentation</h2>
<a class="anchor" id="a2a48eb6e51e2315e945882d5e70ebb2f"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2a48eb6e51e2315e945882d5e70ebb2f">ArchTag</a> = ArchTag_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="affb7a5c96c9e8b04eb94a464e5fdc48b"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#affb7a5c96c9e8b04eb94a464e5fdc48b">ConvertScaledOp</a> = ConvertScaledOp_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a372091a0c8d52bb9aab75c274039ceb7"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a372091a0c8d52bb9aab75c274039ceb7">ElementA</a> = ElementA_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a56649fb86263ffd12aae9f7ae2e6506e"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a> = ElementAccumulator_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a5a1c15eb1246208cf00bd811b8df85ba"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5a1c15eb1246208cf00bd811b8df85ba">ElementB</a> = ElementB_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a4026bdedc7cf880b58acbdfa716f7f28"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4026bdedc7cf880b58acbdfa716f7f28">ElementC</a> = ElementC_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a2b8223379b65f5117d4d8dfd4f6cbc13"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2b8223379b65f5117d4d8dfd4f6cbc13">EpilogueOutputOp</a> = EpilogueOutputOp_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a4a100b8064c285bdb06fe3e523a07d1c"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4a100b8064c285bdb06fe3e523a07d1c">GemmKernel</a> = typename <a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemmSplitKParallel.html">kernel::DefaultGemmSplitKParallel</a>&lt; <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a372091a0c8d52bb9aab75c274039ceb7">ElementA</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae59bd456dc03a51e64a655367382535d">LayoutA</a>, kAlignmentA, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5a1c15eb1246208cf00bd811b8df85ba">ElementB</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a08550ee41dd02322c514364035f5d098">LayoutB</a>, kAlignmentB, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2572b47bae5f0ae0f06647cfe86ad4e">LayoutC</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a1bc1686f83acac48e8a1695b71518b16">OperatorClass</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2a48eb6e51e2315e945882d5e70ebb2f">ArchTag</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ad174d9818e8a4857b65e5da6da7a45b3">ThreadblockShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a36698ab97ffbc16808b60f041a1e583a">WarpShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2e2468a859f14502fd18013859ec9e6">InstructionShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#affb7a5c96c9e8b04eb94a464e5fdc48b">ConvertScaledOp</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aacc6cfe6ebe0b33ec7577c654303f70d">ThreadblockSwizzle</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#af1f647942f7734bbf01e473118f2512c">kStages</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae436b25ceca72104f23d09442de78f73">Operator</a> &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4a100b8064c285bdb06fe3e523a07d1c">GemmKernel</a></td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="ab2e2468a859f14502fd18013859ec9e6"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2e2468a859f14502fd18013859ec9e6">InstructionShape</a> = InstructionShape_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="ae59bd456dc03a51e64a655367382535d"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae59bd456dc03a51e64a655367382535d">LayoutA</a> = LayoutA_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a08550ee41dd02322c514364035f5d098"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a08550ee41dd02322c514364035f5d098">LayoutB</a> = LayoutB_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="ab2572b47bae5f0ae0f06647cfe86ad4e"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2572b47bae5f0ae0f06647cfe86ad4e">LayoutC</a> = LayoutC_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="ae436b25ceca72104f23d09442de78f73"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae436b25ceca72104f23d09442de78f73">Operator</a> = Operator_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a1bc1686f83acac48e8a1695b71518b16"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a1bc1686f83acac48e8a1695b71518b16">OperatorClass</a> = OperatorClass_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="aaf83264eb3effceee610d9547ddf32e9"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aaf83264eb3effceee610d9547ddf32e9">ReductionKernel</a> = <a class="el" href="classcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK.html">cutlass::reduction::kernel::ReduceSplitK</a>&lt; <a class="el" href="structcutlass_1_1MatrixShape.html">cutlass::MatrixShape</a>&lt;4, 32 * EpilogueOutputOp::kCount&gt;, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2b8223379b65f5117d4d8dfd4f6cbc13">EpilogueOutputOp</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ac82ba3da12b03bc91586a3947ce99fc5">ReductionOp</a> &gt;</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="ac82ba3da12b03bc91586a3947ce99fc5"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ac82ba3da12b03bc91586a3947ce99fc5">ReductionOp</a> = ReductionOp_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="ad174d9818e8a4857b65e5da6da7a45b3"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ad174d9818e8a4857b65e5da6da7a45b3">ThreadblockShape</a> = ThreadblockShape_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="aacc6cfe6ebe0b33ec7577c654303f70d"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aacc6cfe6ebe0b33ec7577c654303f70d">ThreadblockSwizzle</a> = ThreadblockSwizzle_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a36698ab97ffbc16808b60f041a1e583a"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="memname">
<tr>
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a36698ab97ffbc16808b60f041a1e583a">WarpShape</a> = WarpShape_</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
<a class="anchor" id="abfb1166a9c55270ff8f1b265516a418c"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">GemmSplitKParallel</a> </td>
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<h2 class="groupheader">Member Function Documentation</h2>
<a class="anchor" id="a114b122602b425909f9be0df461353a4"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">static <a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::can_implement </td>
<td>(</td>
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;&#160;</td>
<td class="paramname"><em>args</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">static</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="ae8fb82c40078cf84c211f10f726caaf5"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">static size_t <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::get_workspace_size </td>
<td>(</td>
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;&#160;</td>
<td class="paramname"><em>args</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">static</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a7085b7cf85bc1bcd202ea6928656d966"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::initialize </td>
<td>(</td>
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;&#160;</td>
<td class="paramname"><em>args</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">void *&#160;</td>
<td class="paramname"><em>workspace</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a5d934b66a5ead9df63bea323abf8f83f"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::operator() </td>
<td>(</td>
<td class="paramtype">cudaStream_t&#160;</td>
<td class="paramname"><em>stream</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="af324f9dd7abc0e3ae6007e6dcf186190"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::operator() </td>
<td>(</td>
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;&#160;</td>
<td class="paramname"><em>args</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">void *&#160;</td>
<td class="paramname"><em>workspace</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">cudaStream_t&#160;</td>
<td class="paramname"><em>stream</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="aeadc76210d06ec22776aca4a58de9930"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::run </td>
<td>(</td>
<td class="paramtype">cudaStream_t&#160;</td>
<td class="paramname"><em>stream</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a07ad725857d7eb191cbfc135df22b781"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::update </td>
<td>(</td>
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &amp;&#160;</td>
<td class="paramname"><em>args</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">void *&#160;</td>
<td class="paramname"><em>workspace</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<h2 class="groupheader">Member Data Documentation</h2>
<a class="anchor" id="af1f647942f7734bbf01e473118f2512c"></a>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert&lt; ElementAccumulator_, DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_&gt;::EpilogueOutputOp::kCount, ElementAccumulator_&gt;, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd&lt; ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount&gt;, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kStages, int kAlignmentA = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration&lt;OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration&lt; OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_&gt;::Operator&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int const <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>&lt; ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ &gt;::kStages = Stages</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">static</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<hr/>The documentation for this class was generated from the following file:<ul>
<li><a class="el" href="device_2gemm__splitk__parallel_8h_source.html">device/gemm_splitk_parallel.h</a></li>
</ul>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.11
</small></address>
</body>
</html>