749 lines
121 KiB
HTML
749 lines
121 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
|
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
|
|
<meta name="generator" content="Doxygen 1.8.11"/>
|
|
<title>CUTLASS: cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ > Class Template Reference</title>
|
|
<link href="tabs.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="jquery.js"></script>
|
|
<script type="text/javascript" src="dynsections.js"></script>
|
|
<link href="search/search.css" rel="stylesheet" type="text/css"/>
|
|
<script type="text/javascript" src="search/searchdata.js"></script>
|
|
<script type="text/javascript" src="search/search.js"></script>
|
|
<script type="text/javascript">
|
|
$(document).ready(function() { init_search(); });
|
|
</script>
|
|
<script type="text/x-mathjax-config">
|
|
MathJax.Hub.Config({
|
|
extensions: ["tex2jax.js"],
|
|
jax: ["input/TeX","output/HTML-CSS"],
|
|
});
|
|
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
|
|
<link href="doxygen.css" rel="stylesheet" type="text/css" />
|
|
</head>
|
|
<body>
|
|
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
|
|
<div id="titlearea">
|
|
<table cellspacing="0" cellpadding="0">
|
|
<tbody>
|
|
<tr style="height: 56px;">
|
|
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
|
|
<td id="projectalign" style="padding-left: 0.5em;">
|
|
<div id="projectname">CUTLASS
|
|
</div>
|
|
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<!-- end header part -->
|
|
<!-- Generated by Doxygen 1.8.11 -->
|
|
<script type="text/javascript">
|
|
var searchBox = new SearchBox("searchBox", "search",false,'Search');
|
|
</script>
|
|
<div id="navrow1" class="tabs">
|
|
<ul class="tablist">
|
|
<li><a href="index.html"><span>Main Page</span></a></li>
|
|
<li><a href="modules.html"><span>Modules</span></a></li>
|
|
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
|
|
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
|
|
<li><a href="files.html"><span>Files</span></a></li>
|
|
<li>
|
|
<div id="MSearchBox" class="MSearchBoxInactive">
|
|
<span class="left">
|
|
<img id="MSearchSelect" src="search/mag_sel.png"
|
|
onmouseover="return searchBox.OnSearchSelectShow()"
|
|
onmouseout="return searchBox.OnSearchSelectHide()"
|
|
alt=""/>
|
|
<input type="text" id="MSearchField" value="Search" accesskey="S"
|
|
onfocus="searchBox.OnSearchFieldFocus(true)"
|
|
onblur="searchBox.OnSearchFieldFocus(false)"
|
|
onkeyup="searchBox.OnSearchFieldChange(event)"/>
|
|
</span><span class="right">
|
|
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
|
|
</span>
|
|
</div>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div id="navrow2" class="tabs2">
|
|
<ul class="tablist">
|
|
<li><a href="annotated.html"><span>Class List</span></a></li>
|
|
<li><a href="classes.html"><span>Class Index</span></a></li>
|
|
<li><a href="inherits.html"><span>Class Hierarchy</span></a></li>
|
|
<li><a href="functions.html"><span>Class Members</span></a></li>
|
|
</ul>
|
|
</div>
|
|
<!-- window showing the filter options -->
|
|
<div id="MSearchSelectWindow"
|
|
onmouseover="return searchBox.OnSearchSelectShow()"
|
|
onmouseout="return searchBox.OnSearchSelectHide()"
|
|
onkeydown="return searchBox.OnSearchSelectKey(event)">
|
|
</div>
|
|
|
|
<!-- iframe showing the search results (closed by default) -->
|
|
<div id="MSearchResultsWindow">
|
|
<iframe src="javascript:void(0)" frameborder="0"
|
|
name="MSearchResults" id="MSearchResults">
|
|
</iframe>
|
|
</div>
|
|
|
|
<div id="nav-path" class="navpath">
|
|
<ul>
|
|
<li class="navelem"><a class="el" href="namespacecutlass.html">cutlass</a></li><li class="navelem"><a class="el" href="namespacecutlass_1_1gemm.html">gemm</a></li><li class="navelem"><a class="el" href="namespacecutlass_1_1gemm_1_1device.html">device</a></li><li class="navelem"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">GemmSplitKParallel</a></li> </ul>
|
|
</div>
|
|
</div><!-- top -->
|
|
<div class="header">
|
|
<div class="summary">
|
|
<a href="#nested-classes">Classes</a> |
|
|
<a href="#pub-types">Public Types</a> |
|
|
<a href="#pub-methods">Public Member Functions</a> |
|
|
<a href="#pub-static-methods">Static Public Member Functions</a> |
|
|
<a href="#pub-static-attribs">Static Public Attributes</a> |
|
|
<a href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel-members.html">List of all members</a> </div>
|
|
<div class="headertitle">
|
|
<div class="title">cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ > Class Template Reference</div> </div>
|
|
</div><!--header-->
|
|
<div class="contents">
|
|
|
|
<p><code>#include <<a class="el" href="device_2gemm__splitk__parallel_8h_source.html">gemm_splitk_parallel.h</a>></code></p>
|
|
<table class="memberdecls">
|
|
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
|
|
Classes</h2></td></tr>
|
|
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a></td></tr>
|
|
<tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Argument structure. <a href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html#details">More...</a><br /></td></tr>
|
|
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
|
|
</table><table class="memberdecls">
|
|
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-types"></a>
|
|
Public Types</h2></td></tr>
|
|
<tr class="memitem:a372091a0c8d52bb9aab75c274039ceb7"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a372091a0c8d52bb9aab75c274039ceb7">ElementA</a> = ElementA_</td></tr>
|
|
<tr class="separator:a372091a0c8d52bb9aab75c274039ceb7"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:ae59bd456dc03a51e64a655367382535d"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae59bd456dc03a51e64a655367382535d">LayoutA</a> = LayoutA_</td></tr>
|
|
<tr class="separator:ae59bd456dc03a51e64a655367382535d"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a5a1c15eb1246208cf00bd811b8df85ba"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5a1c15eb1246208cf00bd811b8df85ba">ElementB</a> = ElementB_</td></tr>
|
|
<tr class="separator:a5a1c15eb1246208cf00bd811b8df85ba"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a08550ee41dd02322c514364035f5d098"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a08550ee41dd02322c514364035f5d098">LayoutB</a> = LayoutB_</td></tr>
|
|
<tr class="separator:a08550ee41dd02322c514364035f5d098"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a4026bdedc7cf880b58acbdfa716f7f28"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4026bdedc7cf880b58acbdfa716f7f28">ElementC</a> = ElementC_</td></tr>
|
|
<tr class="separator:a4026bdedc7cf880b58acbdfa716f7f28"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:ab2572b47bae5f0ae0f06647cfe86ad4e"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2572b47bae5f0ae0f06647cfe86ad4e">LayoutC</a> = LayoutC_</td></tr>
|
|
<tr class="separator:ab2572b47bae5f0ae0f06647cfe86ad4e"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a56649fb86263ffd12aae9f7ae2e6506e"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a> = ElementAccumulator_</td></tr>
|
|
<tr class="separator:a56649fb86263ffd12aae9f7ae2e6506e"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a1bc1686f83acac48e8a1695b71518b16"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a1bc1686f83acac48e8a1695b71518b16">OperatorClass</a> = OperatorClass_</td></tr>
|
|
<tr class="separator:a1bc1686f83acac48e8a1695b71518b16"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a2a48eb6e51e2315e945882d5e70ebb2f"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2a48eb6e51e2315e945882d5e70ebb2f">ArchTag</a> = ArchTag_</td></tr>
|
|
<tr class="separator:a2a48eb6e51e2315e945882d5e70ebb2f"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:ad174d9818e8a4857b65e5da6da7a45b3"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ad174d9818e8a4857b65e5da6da7a45b3">ThreadblockShape</a> = ThreadblockShape_</td></tr>
|
|
<tr class="separator:ad174d9818e8a4857b65e5da6da7a45b3"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a36698ab97ffbc16808b60f041a1e583a"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a36698ab97ffbc16808b60f041a1e583a">WarpShape</a> = WarpShape_</td></tr>
|
|
<tr class="separator:a36698ab97ffbc16808b60f041a1e583a"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:ab2e2468a859f14502fd18013859ec9e6"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2e2468a859f14502fd18013859ec9e6">InstructionShape</a> = InstructionShape_</td></tr>
|
|
<tr class="separator:ab2e2468a859f14502fd18013859ec9e6"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:affb7a5c96c9e8b04eb94a464e5fdc48b"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#affb7a5c96c9e8b04eb94a464e5fdc48b">ConvertScaledOp</a> = ConvertScaledOp_</td></tr>
|
|
<tr class="separator:affb7a5c96c9e8b04eb94a464e5fdc48b"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a2b8223379b65f5117d4d8dfd4f6cbc13"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2b8223379b65f5117d4d8dfd4f6cbc13">EpilogueOutputOp</a> = EpilogueOutputOp_</td></tr>
|
|
<tr class="separator:a2b8223379b65f5117d4d8dfd4f6cbc13"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:ac82ba3da12b03bc91586a3947ce99fc5"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ac82ba3da12b03bc91586a3947ce99fc5">ReductionOp</a> = ReductionOp_</td></tr>
|
|
<tr class="separator:ac82ba3da12b03bc91586a3947ce99fc5"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:aacc6cfe6ebe0b33ec7577c654303f70d"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aacc6cfe6ebe0b33ec7577c654303f70d">ThreadblockSwizzle</a> = ThreadblockSwizzle_</td></tr>
|
|
<tr class="separator:aacc6cfe6ebe0b33ec7577c654303f70d"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:ae436b25ceca72104f23d09442de78f73"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae436b25ceca72104f23d09442de78f73">Operator</a> = Operator_</td></tr>
|
|
<tr class="separator:ae436b25ceca72104f23d09442de78f73"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a4a100b8064c285bdb06fe3e523a07d1c"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4a100b8064c285bdb06fe3e523a07d1c">GemmKernel</a> = typename <a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemmSplitKParallel.html">kernel::DefaultGemmSplitKParallel</a>< <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a372091a0c8d52bb9aab75c274039ceb7">ElementA</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae59bd456dc03a51e64a655367382535d">LayoutA</a>, kAlignmentA, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5a1c15eb1246208cf00bd811b8df85ba">ElementB</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a08550ee41dd02322c514364035f5d098">LayoutB</a>, kAlignmentB, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2572b47bae5f0ae0f06647cfe86ad4e">LayoutC</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a1bc1686f83acac48e8a1695b71518b16">OperatorClass</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2a48eb6e51e2315e945882d5e70ebb2f">ArchTag</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ad174d9818e8a4857b65e5da6da7a45b3">ThreadblockShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a36698ab97ffbc16808b60f041a1e583a">WarpShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2e2468a859f14502fd18013859ec9e6">InstructionShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#affb7a5c96c9e8b04eb94a464e5fdc48b">ConvertScaledOp</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aacc6cfe6ebe0b33ec7577c654303f70d">ThreadblockSwizzle</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#af1f647942f7734bbf01e473118f2512c">kStages</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae436b25ceca72104f23d09442de78f73">Operator</a> >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4a100b8064c285bdb06fe3e523a07d1c">GemmKernel</a></td></tr>
|
|
<tr class="memdesc:a4a100b8064c285bdb06fe3e523a07d1c"><td class="mdescLeft"> </td><td class="mdescRight">GEMM kernel. <a href="#a4a100b8064c285bdb06fe3e523a07d1c">More...</a><br /></td></tr>
|
|
<tr class="separator:a4a100b8064c285bdb06fe3e523a07d1c"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:aaf83264eb3effceee610d9547ddf32e9"><td class="memItemLeft" align="right" valign="top">using </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aaf83264eb3effceee610d9547ddf32e9">ReductionKernel</a> = <a class="el" href="classcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK.html">cutlass::reduction::kernel::ReduceSplitK</a>< <a class="el" href="structcutlass_1_1MatrixShape.html">cutlass::MatrixShape</a>< 4, 32 *EpilogueOutputOp::kCount >, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2b8223379b65f5117d4d8dfd4f6cbc13">EpilogueOutputOp</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ac82ba3da12b03bc91586a3947ce99fc5">ReductionOp</a> ></td></tr>
|
|
<tr class="memdesc:aaf83264eb3effceee610d9547ddf32e9"><td class="mdescLeft"> </td><td class="mdescRight">Reduction kernel. <a href="#aaf83264eb3effceee610d9547ddf32e9">More...</a><br /></td></tr>
|
|
<tr class="separator:aaf83264eb3effceee610d9547ddf32e9"><td class="memSeparator" colspan="2"> </td></tr>
|
|
</table><table class="memberdecls">
|
|
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
|
|
Public Member Functions</h2></td></tr>
|
|
<tr class="memitem:abfb1166a9c55270ff8f1b265516a418c"><td class="memItemLeft" align="right" valign="top"> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#abfb1166a9c55270ff8f1b265516a418c">GemmSplitKParallel</a> ()</td></tr>
|
|
<tr class="memdesc:abfb1166a9c55270ff8f1b265516a418c"><td class="mdescLeft"> </td><td class="mdescRight">Constructs the GEMM. <a href="#abfb1166a9c55270ff8f1b265516a418c">More...</a><br /></td></tr>
|
|
<tr class="separator:abfb1166a9c55270ff8f1b265516a418c"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a7085b7cf85bc1bcd202ea6928656d966"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a7085b7cf85bc1bcd202ea6928656d966">initialize</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &args, void *workspace)</td></tr>
|
|
<tr class="memdesc:a7085b7cf85bc1bcd202ea6928656d966"><td class="mdescLeft"> </td><td class="mdescRight">Initializes GEMM state from arguments. <a href="#a7085b7cf85bc1bcd202ea6928656d966">More...</a><br /></td></tr>
|
|
<tr class="separator:a7085b7cf85bc1bcd202ea6928656d966"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a07ad725857d7eb191cbfc135df22b781"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a07ad725857d7eb191cbfc135df22b781">update</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &args, void *workspace=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>)</td></tr>
|
|
<tr class="memdesc:a07ad725857d7eb191cbfc135df22b781"><td class="mdescLeft"> </td><td class="mdescRight">Lightweight update given a subset of arguments. <a href="#a07ad725857d7eb191cbfc135df22b781">More...</a><br /></td></tr>
|
|
<tr class="separator:a07ad725857d7eb191cbfc135df22b781"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:aeadc76210d06ec22776aca4a58de9930"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aeadc76210d06ec22776aca4a58de9930">run</a> (cudaStream_t stream=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>)</td></tr>
|
|
<tr class="memdesc:aeadc76210d06ec22776aca4a58de9930"><td class="mdescLeft"> </td><td class="mdescRight">Runs the kernel using initialized state. <a href="#aeadc76210d06ec22776aca4a58de9930">More...</a><br /></td></tr>
|
|
<tr class="separator:aeadc76210d06ec22776aca4a58de9930"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:a5d934b66a5ead9df63bea323abf8f83f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5d934b66a5ead9df63bea323abf8f83f">operator()</a> (cudaStream_t stream=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>)</td></tr>
|
|
<tr class="memdesc:a5d934b66a5ead9df63bea323abf8f83f"><td class="mdescLeft"> </td><td class="mdescRight">Runs the kernel using initialized state. <a href="#a5d934b66a5ead9df63bea323abf8f83f">More...</a><br /></td></tr>
|
|
<tr class="separator:a5d934b66a5ead9df63bea323abf8f83f"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:af324f9dd7abc0e3ae6007e6dcf186190"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#af324f9dd7abc0e3ae6007e6dcf186190">operator()</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &args, void *workspace=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>, cudaStream_t stream=<a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a>)</td></tr>
|
|
<tr class="memdesc:af324f9dd7abc0e3ae6007e6dcf186190"><td class="mdescLeft"> </td><td class="mdescRight">Runs the kernel using initialized state. <a href="#af324f9dd7abc0e3ae6007e6dcf186190">More...</a><br /></td></tr>
|
|
<tr class="separator:af324f9dd7abc0e3ae6007e6dcf186190"><td class="memSeparator" colspan="2"> </td></tr>
|
|
</table><table class="memberdecls">
|
|
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-methods"></a>
|
|
Static Public Member Functions</h2></td></tr>
|
|
<tr class="memitem:a114b122602b425909f9be0df461353a4"><td class="memItemLeft" align="right" valign="top">static <a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a114b122602b425909f9be0df461353a4">can_implement</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &args)</td></tr>
|
|
<tr class="memdesc:a114b122602b425909f9be0df461353a4"><td class="mdescLeft"> </td><td class="mdescRight">Determines whether the GEMM can execute the given problem. <a href="#a114b122602b425909f9be0df461353a4">More...</a><br /></td></tr>
|
|
<tr class="separator:a114b122602b425909f9be0df461353a4"><td class="memSeparator" colspan="2"> </td></tr>
|
|
<tr class="memitem:ae8fb82c40078cf84c211f10f726caaf5"><td class="memItemLeft" align="right" valign="top">static size_t </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae8fb82c40078cf84c211f10f726caaf5">get_workspace_size</a> (<a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const &args)</td></tr>
|
|
<tr class="memdesc:ae8fb82c40078cf84c211f10f726caaf5"><td class="mdescLeft"> </td><td class="mdescRight">Gets the workspace size. <a href="#ae8fb82c40078cf84c211f10f726caaf5">More...</a><br /></td></tr>
|
|
<tr class="separator:ae8fb82c40078cf84c211f10f726caaf5"><td class="memSeparator" colspan="2"> </td></tr>
|
|
</table><table class="memberdecls">
|
|
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-attribs"></a>
|
|
Static Public Attributes</h2></td></tr>
|
|
<tr class="memitem:af1f647942f7734bbf01e473118f2512c"><td class="memItemLeft" align="right" valign="top">static int const </td><td class="memItemRight" valign="bottom"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#af1f647942f7734bbf01e473118f2512c">kStages</a> = Stages</td></tr>
|
|
<tr class="separator:af1f647942f7734bbf01e473118f2512c"><td class="memSeparator" colspan="2"> </td></tr>
|
|
</table>
|
|
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
|
|
<div class="textblock"><h3>template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator><br />
|
|
class cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ ></h3>
|
|
|
|
<p><a class="el" href="classcutlass_1_1gemm_1_1device_1_1Gemm.html">Gemm</a> device-level operator performing parallel reduction over the K partition. </p>
|
|
</div><h2 class="groupheader">Member Typedef Documentation</h2>
|
|
<a class="anchor" id="a2a48eb6e51e2315e945882d5e70ebb2f"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2a48eb6e51e2315e945882d5e70ebb2f">ArchTag</a> = ArchTag_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="affb7a5c96c9e8b04eb94a464e5fdc48b"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#affb7a5c96c9e8b04eb94a464e5fdc48b">ConvertScaledOp</a> = ConvertScaledOp_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a372091a0c8d52bb9aab75c274039ceb7"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a372091a0c8d52bb9aab75c274039ceb7">ElementA</a> = ElementA_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a56649fb86263ffd12aae9f7ae2e6506e"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a> = ElementAccumulator_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a5a1c15eb1246208cf00bd811b8df85ba"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5a1c15eb1246208cf00bd811b8df85ba">ElementB</a> = ElementB_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a4026bdedc7cf880b58acbdfa716f7f28"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4026bdedc7cf880b58acbdfa716f7f28">ElementC</a> = ElementC_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a2b8223379b65f5117d4d8dfd4f6cbc13"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2b8223379b65f5117d4d8dfd4f6cbc13">EpilogueOutputOp</a> = EpilogueOutputOp_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a4a100b8064c285bdb06fe3e523a07d1c"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4a100b8064c285bdb06fe3e523a07d1c">GemmKernel</a> = typename <a class="el" href="structcutlass_1_1gemm_1_1kernel_1_1DefaultGemmSplitKParallel.html">kernel::DefaultGemmSplitKParallel</a>< <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a372091a0c8d52bb9aab75c274039ceb7">ElementA</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae59bd456dc03a51e64a655367382535d">LayoutA</a>, kAlignmentA, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a5a1c15eb1246208cf00bd811b8df85ba">ElementB</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a08550ee41dd02322c514364035f5d098">LayoutB</a>, kAlignmentB, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2572b47bae5f0ae0f06647cfe86ad4e">LayoutC</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a56649fb86263ffd12aae9f7ae2e6506e">ElementAccumulator</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a1bc1686f83acac48e8a1695b71518b16">OperatorClass</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2a48eb6e51e2315e945882d5e70ebb2f">ArchTag</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ad174d9818e8a4857b65e5da6da7a45b3">ThreadblockShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a36698ab97ffbc16808b60f041a1e583a">WarpShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2e2468a859f14502fd18013859ec9e6">InstructionShape</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#affb7a5c96c9e8b04eb94a464e5fdc48b">ConvertScaledOp</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aacc6cfe6ebe0b33ec7577c654303f70d">ThreadblockSwizzle</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#af1f647942f7734bbf01e473118f2512c">kStages</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae436b25ceca72104f23d09442de78f73">Operator</a> >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a4a100b8064c285bdb06fe3e523a07d1c">GemmKernel</a></td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="ab2e2468a859f14502fd18013859ec9e6"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2e2468a859f14502fd18013859ec9e6">InstructionShape</a> = InstructionShape_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="ae59bd456dc03a51e64a655367382535d"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae59bd456dc03a51e64a655367382535d">LayoutA</a> = LayoutA_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a08550ee41dd02322c514364035f5d098"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a08550ee41dd02322c514364035f5d098">LayoutB</a> = LayoutB_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="ab2572b47bae5f0ae0f06647cfe86ad4e"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ab2572b47bae5f0ae0f06647cfe86ad4e">LayoutC</a> = LayoutC_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="ae436b25ceca72104f23d09442de78f73"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ae436b25ceca72104f23d09442de78f73">Operator</a> = Operator_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a1bc1686f83acac48e8a1695b71518b16"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a1bc1686f83acac48e8a1695b71518b16">OperatorClass</a> = OperatorClass_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="aaf83264eb3effceee610d9547ddf32e9"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aaf83264eb3effceee610d9547ddf32e9">ReductionKernel</a> = <a class="el" href="classcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK.html">cutlass::reduction::kernel::ReduceSplitK</a>< <a class="el" href="structcutlass_1_1MatrixShape.html">cutlass::MatrixShape</a><4, 32 * EpilogueOutputOp::kCount>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a2b8223379b65f5117d4d8dfd4f6cbc13">EpilogueOutputOp</a>, <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ac82ba3da12b03bc91586a3947ce99fc5">ReductionOp</a> ></td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="ac82ba3da12b03bc91586a3947ce99fc5"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ac82ba3da12b03bc91586a3947ce99fc5">ReductionOp</a> = ReductionOp_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="ad174d9818e8a4857b65e5da6da7a45b3"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#ad174d9818e8a4857b65e5da6da7a45b3">ThreadblockShape</a> = ThreadblockShape_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="aacc6cfe6ebe0b33ec7577c654303f70d"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#aacc6cfe6ebe0b33ec7577c654303f70d">ThreadblockSwizzle</a> = ThreadblockSwizzle_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a36698ab97ffbc16808b60f041a1e583a"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">using <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html#a36698ab97ffbc16808b60f041a1e583a">WarpShape</a> = WarpShape_</td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<h2 class="groupheader">Constructor & Destructor Documentation</h2>
|
|
<a class="anchor" id="abfb1166a9c55270ff8f1b265516a418c"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname"><a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::<a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">GemmSplitKParallel</a> </td>
|
|
<td>(</td>
|
|
<td class="paramname"></td><td>)</td>
|
|
<td></td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<h2 class="groupheader">Member Function Documentation</h2>
|
|
<a class="anchor" id="a114b122602b425909f9be0df461353a4"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">static <a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::can_implement </td>
|
|
<td>(</td>
|
|
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const & </td>
|
|
<td class="paramname"><em>args</em></td><td>)</td>
|
|
<td></td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">static</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="ae8fb82c40078cf84c211f10f726caaf5"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">static size_t <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::get_workspace_size </td>
|
|
<td>(</td>
|
|
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const & </td>
|
|
<td class="paramname"><em>args</em></td><td>)</td>
|
|
<td></td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">static</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a7085b7cf85bc1bcd202ea6928656d966"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::initialize </td>
|
|
<td>(</td>
|
|
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const & </td>
|
|
<td class="paramname"><em>args</em>, </td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramkey"></td>
|
|
<td></td>
|
|
<td class="paramtype">void * </td>
|
|
<td class="paramname"><em>workspace</em> </td>
|
|
</tr>
|
|
<tr>
|
|
<td></td>
|
|
<td>)</td>
|
|
<td></td><td></td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a5d934b66a5ead9df63bea323abf8f83f"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::operator() </td>
|
|
<td>(</td>
|
|
<td class="paramtype">cudaStream_t </td>
|
|
<td class="paramname"><em>stream</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code></td><td>)</td>
|
|
<td></td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="af324f9dd7abc0e3ae6007e6dcf186190"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::operator() </td>
|
|
<td>(</td>
|
|
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const & </td>
|
|
<td class="paramname"><em>args</em>, </td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramkey"></td>
|
|
<td></td>
|
|
<td class="paramtype">void * </td>
|
|
<td class="paramname"><em>workspace</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code>, </td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramkey"></td>
|
|
<td></td>
|
|
<td class="paramtype">cudaStream_t </td>
|
|
<td class="paramname"><em>stream</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code> </td>
|
|
</tr>
|
|
<tr>
|
|
<td></td>
|
|
<td>)</td>
|
|
<td></td><td></td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="aeadc76210d06ec22776aca4a58de9930"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::run </td>
|
|
<td>(</td>
|
|
<td class="paramtype">cudaStream_t </td>
|
|
<td class="paramname"><em>stream</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code></td><td>)</td>
|
|
<td></td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<a class="anchor" id="a07ad725857d7eb191cbfc135df22b781"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname"><a class="el" href="namespacecutlass.html#ac5a88c5840a28a9e0206b9cc7812a18d">Status</a> <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::update </td>
|
|
<td>(</td>
|
|
<td class="paramtype"><a class="el" href="structcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel_1_1Arguments.html">Arguments</a> const & </td>
|
|
<td class="paramname"><em>args</em>, </td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramkey"></td>
|
|
<td></td>
|
|
<td class="paramtype">void * </td>
|
|
<td class="paramname"><em>workspace</em> = <code><a class="el" href="platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936">nullptr</a></code> </td>
|
|
</tr>
|
|
<tr>
|
|
<td></td>
|
|
<td>)</td>
|
|
<td></td><td></td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<h2 class="groupheader">Member Data Documentation</h2>
|
|
<a class="anchor" id="af1f647942f7734bbf01e473118f2512c"></a>
|
|
<div class="memitem">
|
|
<div class="memproto">
|
|
<div class="memtemplate">
|
|
template<typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_, typename ElementAccumulator_ = ElementC_, typename OperatorClass_ = arch::OpClassSimt, typename ArchTag_ = arch::Sm70, typename ThreadblockShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::ThreadblockShape, typename WarpShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::WarpShape, typename InstructionShape_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::InstructionShape, typename EpilogueOutputOp_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::EpilogueOutputOp, typename ConvertScaledOp_ = cutlass::epilogue::thread::Convert< ElementAccumulator_, DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementAccumulator_, ElementAccumulator_>::EpilogueOutputOp::kCount, ElementAccumulator_>, typename ReductionOp_ = cutlass::reduction::thread::ReduceAdd< ElementAccumulator_, typename EpilogueOutputOp_::ElementAccumulator, EpilogueOutputOp_::kCount>, typename ThreadblockSwizzle_ = threadblock::GemmSplitKHorizontalThreadblockSwizzle, int Stages = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kStages, int kAlignmentA = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentA, int kAlignmentB = DefaultGemmConfiguration<OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::kAlignmentB, typename Operator_ = typename DefaultGemmConfiguration< OperatorClass_, ArchTag_, ElementA_, ElementB_, ElementC_, ElementAccumulator_>::Operator> </div>
|
|
<table class="mlabels">
|
|
<tr>
|
|
<td class="mlabels-left">
|
|
<table class="memname">
|
|
<tr>
|
|
<td class="memname">int const <a class="el" href="classcutlass_1_1gemm_1_1device_1_1GemmSplitKParallel.html">cutlass::gemm::device::GemmSplitKParallel</a>< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::kStages = Stages</td>
|
|
</tr>
|
|
</table>
|
|
</td>
|
|
<td class="mlabels-right">
|
|
<span class="mlabels"><span class="mlabel">static</span></span> </td>
|
|
</tr>
|
|
</table>
|
|
</div><div class="memdoc">
|
|
|
|
</div>
|
|
</div>
|
|
<hr/>The documentation for this class was generated from the following file:<ul>
|
|
<li><a class="el" href="device_2gemm__splitk__parallel_8h_source.html">device/gemm_splitk_parallel.h</a></li>
|
|
</ul>
|
|
</div><!-- contents -->
|
|
<!-- start footer part -->
|
|
<hr class="footer"/><address class="footer"><small>
|
|
Generated by  <a href="http://www.doxygen.org/index.html">
|
|
<img class="footer" src="doxygen.png" alt="doxygen"/>
|
|
</a> 1.8.11
|
|
</small></address>
|
|
</body>
|
|
</html>
|