cutlass/docs/files.html

293 lines
71 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.11"/>
<title>CUTLASS: File List</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { init_search(); });
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><img alt="Logo" src="cutlass-logo-small.png"/></td>
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">CUTLASS
</div>
<div id="projectbrief">CUDA Templates for Linear Algebra Subroutines and Solvers</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.11 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="index.html"><span>Main&#160;Page</span></a></li>
<li><a href="modules.html"><span>Modules</span></a></li>
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
<li><a href="annotated.html"><span>Classes</span></a></li>
<li class="current"><a href="files.html"><span>Files</span></a></li>
<li>
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
<div id="navrow2" class="tabs2">
<ul class="tablist">
<li class="current"><a href="files.html"><span>File&#160;List</span></a></li>
<li><a href="globals.html"><span>File&#160;Members</span></a></li>
</ul>
</div>
</div><!-- top -->
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div class="header">
<div class="headertitle">
<div class="title">File List</div> </div>
</div><!--header-->
<div class="contents">
<div class="textblock">Here is a list of all files with brief descriptions:</div><div class="directory">
<table class="directory">
<tr id="row_0_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="aligned__buffer_8h_source.html"><span class="icondoc"></span></a><a class="el" href="aligned__buffer_8h.html" target="_self">aligned_buffer.h</a></td><td class="desc">AlignedBuffer is a container for trivially copyable elements suitable for use in unions and shared memory </td></tr>
<tr id="row_1_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="arch_8h_source.html"><span class="icondoc"></span></a><a class="el" href="arch_8h.html" target="_self">arch.h</a></td><td class="desc">Defines tags for architecture-specific configurations </td></tr>
<tr id="row_2_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="array_8h_source.html"><span class="icondoc"></span></a><a class="el" href="array_8h.html" target="_self">array.h</a></td><td class="desc">Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe to use in a union </td></tr>
<tr id="row_3_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="array__subbyte_8h_source.html"><span class="icondoc"></span></a><a class="el" href="array__subbyte_8h.html" target="_self">array_subbyte.h</a></td><td class="desc">Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe to use in a union </td></tr>
<tr id="row_4_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="batched__reduction_8h_source.html"><span class="icondoc"></span></a><a class="el" href="batched__reduction_8h.html" target="_self">batched_reduction.h</a></td><td class="desc">Implements a software-pipelined efficient batched reduction. D = alpha * Reduction(A) + beta * C </td></tr>
<tr id="row_5_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="batched__reduction__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="batched__reduction__traits_8h.html" target="_self">batched_reduction_traits.h</a></td><td class="desc">Defines structural properties of complete batched reduction. D = alpha * Reduction(A) + beta * C </td></tr>
<tr id="row_6_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="command__line_8h_source.html"><span class="icondoc"></span></a><a class="el" href="command__line_8h.html" target="_self">command_line.h</a></td><td class="desc"></td></tr>
<tr id="row_7_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="complex_8h_source.html"><span class="icondoc"></span></a><a class="el" href="complex_8h.html" target="_self">complex.h</a></td><td class="desc"></td></tr>
<tr id="row_8_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="conversion__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="conversion__op_8h.html" target="_self">conversion_op.h</a></td><td class="desc">Functor performing conversion operations used by epilogues </td></tr>
<tr id="row_9_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="coord_8h_source.html"><span class="icondoc"></span></a><a class="el" href="coord_8h.html" target="_self">coord.h</a></td><td class="desc">A Coord is a coordinate of arbitrary rank into a tensor or matrix </td></tr>
<tr id="row_10_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="core__io_8h_source.html"><span class="icondoc"></span></a><a class="el" href="core__io_8h.html" target="_self">core_io.h</a></td><td class="desc">Helpers for printing cutlass/core objects </td></tr>
<tr id="row_11_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="cutlass_8h_source.html"><span class="icondoc"></span></a><a class="el" href="cutlass_8h.html" target="_self">cutlass.h</a></td><td class="desc">Basic include for CUTLASS </td></tr>
<tr id="row_12_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="include_2cutlass_2util_2debug_8h_source.html"><span class="icondoc"></span></a><a class="el" href="include_2cutlass_2util_2debug_8h.html" target="_self">include/cutlass/util/debug.h</a></td><td class="desc">Debugging and logging functionality </td></tr>
<tr id="row_13_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tools_2util_2include_2cutlass_2util_2debug_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tools_2util_2include_2cutlass_2util_2debug_8h.html" target="_self">tools/util/include/cutlass/util/debug.h</a></td><td class="desc">Contains code for debugging cutlass code </td></tr>
<tr id="row_14_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__epilogue__complex__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__epilogue__complex__tensor__op_8h.html" target="_self">default_epilogue_complex_tensor_op.h</a></td><td class="desc">Epilogue for threadblock scoped complex GEMMs using Tensor Ops </td></tr>
<tr id="row_15_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__epilogue__simt_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__epilogue__simt_8h.html" target="_self">default_epilogue_simt.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using SIMT </td></tr>
<tr id="row_16_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__epilogue__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__epilogue__tensor__op_8h.html" target="_self">default_epilogue_tensor_op.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using Tensor Ops </td></tr>
<tr id="row_17_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__epilogue__volta__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__epilogue__volta__tensor__op_8h.html" target="_self">default_epilogue_volta_tensor_op.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using Tensor Ops on Volta </td></tr>
<tr id="row_18_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__epilogue__wmma__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__epilogue__wmma__tensor__op_8h.html" target="_self">default_epilogue_wmma_tensor_op.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using Tensor Ops </td></tr>
<tr id="row_19_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__gemm_8h.html" target="_self">default_gemm.h</a></td><td class="desc">Default kernel-level GEMM definitions combine threadblock-scoped matrix multiply-add with the appropriate threadblock-scoped epilogue </td></tr>
<tr id="row_20_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__gemm__configuration_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__gemm__configuration_8h.html" target="_self">default_gemm_configuration.h</a></td><td class="desc">Definitions for GEMM structures </td></tr>
<tr id="row_21_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__gemm__splitk__parallel_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__gemm__splitk__parallel_8h.html" target="_self">default_gemm_splitk_parallel.h</a></td><td class="desc">Default kernel-level GEMM definitions combine threadblock-scoped matrix multiply-add with the appropriate threadblock-scoped epilogue </td></tr>
<tr id="row_22_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__gemv_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__gemv_8h.html" target="_self">default_gemv.h</a></td><td class="desc"></td></tr>
<tr id="row_23_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__gemv__core_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__gemv__core_8h.html" target="_self">default_gemv_core.h</a></td><td class="desc">Defines basic properties needed by CTA-level batched GEMV assuming expectations about data layout of the global memory fragments, data types, and internal tile sizes </td></tr>
<tr id="row_24_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma_8h.html" target="_self">default_mma.h</a></td><td class="desc">Template for a pipelined GEMM kernel. Does not compute batching or support split-K </td></tr>
<tr id="row_25_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma__core_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma__core_8h.html" target="_self">default_mma_core.h</a></td><td class="desc">Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the global memory fragments, data types, and internal tile sizes </td></tr>
<tr id="row_26_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma__core__simt_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma__core__simt_8h.html" target="_self">default_mma_core_simt.h</a></td><td class="desc">Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the global memory fragments, data types, and internal tile sizes </td></tr>
<tr id="row_27_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma__core__sm50_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma__core__sm50_8h.html" target="_self">default_mma_core_sm50.h</a></td><td class="desc">Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the global memory fragments, data types, and internal tile sizes </td></tr>
<tr id="row_28_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma__core__sm70_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma__core__sm70_8h.html" target="_self">default_mma_core_sm70.h</a></td><td class="desc">Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the global memory fragments, data types, and internal tile sizes </td></tr>
<tr id="row_29_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma__core__sm75_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma__core__sm75_8h.html" target="_self">default_mma_core_sm75.h</a></td><td class="desc">Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the global memory fragments, data types, and internal tile sizes </td></tr>
<tr id="row_30_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma__core__wmma_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma__core__wmma_8h.html" target="_self">default_mma_core_wmma.h</a></td><td class="desc">Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the global memory fragments, data types, and internal tile sizes </td></tr>
<tr id="row_31_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma__tensor__op_8h.html" target="_self">default_mma_tensor_op.h</a></td><td class="desc">Default warp-level GEMM operators selected by data type, size, and layouts of operands </td></tr>
<tr id="row_32_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__mma__wmma__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__mma__wmma__tensor__op_8h.html" target="_self">default_mma_wmma_tensor_op.h</a></td><td class="desc">Default warp-level GEMM operators selected by data type, size, and layouts of operands </td></tr>
<tr id="row_33_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__thread__map__simt_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__thread__map__simt_8h.html" target="_self">default_thread_map_simt.h</a></td><td class="desc"></td></tr>
<tr id="row_34_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__thread__map__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__thread__map__tensor__op_8h.html" target="_self">default_thread_map_tensor_op.h</a></td><td class="desc"></td></tr>
<tr id="row_35_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__thread__map__volta__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__thread__map__volta__tensor__op_8h.html" target="_self">default_thread_map_volta_tensor_op.h</a></td><td class="desc"></td></tr>
<tr id="row_36_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="default__thread__map__wmma__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="default__thread__map__wmma__tensor__op_8h.html" target="_self">default_thread_map_wmma_tensor_op.h</a></td><td class="desc"></td></tr>
<tr id="row_37_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device__dump_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device__dump_8h.html" target="_self">device_dump.h</a></td><td class="desc">C++ interface to dump fragments and shared memory contents for debugging </td></tr>
<tr id="row_38_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device__kernel_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device__kernel_8h.html" target="_self">device_kernel.h</a></td><td class="desc">Template for generic CUTLASS kernel </td></tr>
<tr id="row_39_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device__memory_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device__memory_8h.html" target="_self">device_memory.h</a></td><td class="desc">C++ interface to CUDA device memory management functions </td></tr>
<tr id="row_40_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="direct__epilogue__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="direct__epilogue__tensor__op_8h.html" target="_self">direct_epilogue_tensor_op.h</a></td><td class="desc">Epilogue for tensor operations </td></tr>
<tr id="row_41_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="distribution_8h_source.html"><span class="icondoc"></span></a><a class="el" href="distribution_8h.html" target="_self">distribution.h</a></td><td class="desc">This header contains a class to parametrize a statistical distribution function </td></tr>
<tr id="row_42_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="epilogue_8h_source.html"><span class="icondoc"></span></a><a class="el" href="epilogue_8h.html" target="_self">epilogue.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using Tensor Ops </td></tr>
<tr id="row_43_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="epilogue__base_8h_source.html"><span class="icondoc"></span></a><a class="el" href="epilogue__base_8h.html" target="_self">epilogue_base.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using Tensor Ops </td></tr>
<tr id="row_44_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="epilogue__workspace_8h_source.html"><span class="icondoc"></span></a><a class="el" href="epilogue__workspace_8h.html" target="_self">epilogue_workspace.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs </td></tr>
<tr id="row_45_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="exceptions_8h_source.html"><span class="icondoc"></span></a><a class="el" href="exceptions_8h.html" target="_self">exceptions.h</a></td><td class="desc">C++ exception semantics for CUDA error codes </td></tr>
<tr id="row_46_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fast__math_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fast__math_8h.html" target="_self">fast_math.h</a></td><td class="desc">Math utilities </td></tr>
<tr id="row_47_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fragment__iterator__complex__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fragment__iterator__complex__tensor__op_8h.html" target="_self">fragment_iterator_complex_tensor_op.h</a></td><td class="desc">This defines a "fragment" iterator for visiting the fragments of an accumulator tile that participate in one warp-level store operation </td></tr>
<tr id="row_48_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fragment__iterator__simt_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fragment__iterator__simt_8h.html" target="_self">fragment_iterator_simt.h</a></td><td class="desc">This defines a "fragment" iterator for visiting the fragments of an accumulator tile that participate in one warp-level store operation </td></tr>
<tr id="row_49_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fragment__iterator__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fragment__iterator__tensor__op_8h.html" target="_self">fragment_iterator_tensor_op.h</a></td><td class="desc">This defines a "fragment" iterator for visiting the fragments of an accumulator tile that participate in one warp-level store operation </td></tr>
<tr id="row_50_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fragment__iterator__volta__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fragment__iterator__volta__tensor__op_8h.html" target="_self">fragment_iterator_volta_tensor_op.h</a></td><td class="desc">This defines a "fragment" iterator for visiting the fragments of an accumulator tile that participate in one warp-level store operation </td></tr>
<tr id="row_51_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="fragment__iterator__wmma__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="fragment__iterator__wmma__tensor__op_8h.html" target="_self">fragment_iterator_wmma_tensor_op.h</a></td><td class="desc">This defines a "fragment" iterator for visiting the fragments of an accumulator tile that participate in one warp-level store operation </td></tr>
<tr id="row_52_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="functional_8h_source.html"><span class="icondoc"></span></a><a class="el" href="functional_8h.html" target="_self">functional.h</a></td><td class="desc">Define basic numeric operators with specializations for Array&lt;T, N&gt;. SIMD-ize where possible </td></tr>
<tr id="row_53_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="include_2cutlass_2gemm_2device_2gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="include_2cutlass_2gemm_2device_2gemm_8h.html" target="_self">include/cutlass/gemm/device/gemm.h</a></td><td class="desc">Template for a pipelined GEMM kernel. Does not compute batching or support split-K </td></tr>
<tr id="row_54_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="include_2cutlass_2gemm_2gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="include_2cutlass_2gemm_2gemm_8h.html" target="_self">include/cutlass/gemm/gemm.h</a></td><td class="desc">Defines common types used for all GEMM-like operators </td></tr>
<tr id="row_55_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="include_2cutlass_2gemm_2kernel_2gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="include_2cutlass_2gemm_2kernel_2gemm_8h.html" target="_self">include/cutlass/gemm/kernel/gemm.h</a></td><td class="desc">Template for a pipelined GEMM kernel. Does not compute batching or support split-K </td></tr>
<tr id="row_56_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tools_2util_2include_2cutlass_2util_2reference_2device_2gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tools_2util_2include_2cutlass_2util_2reference_2device_2gemm_8h.html" target="_self">tools/util/include/cutlass/util/reference/device/gemm.h</a></td><td class="desc">Reference implementation for GEMM in device-side code </td></tr>
<tr id="row_57_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tools_2util_2include_2cutlass_2util_2reference_2device_2kernel_2gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tools_2util_2include_2cutlass_2util_2reference_2device_2kernel_2gemm_8h.html" target="_self">tools/util/include/cutlass/util/reference/device/kernel/gemm.h</a></td><td class="desc">Reference implementation for GEMM in host-side code </td></tr>
<tr id="row_58_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tools_2util_2include_2cutlass_2util_2reference_2device_2thread_2gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tools_2util_2include_2cutlass_2util_2reference_2device_2thread_2gemm_8h.html" target="_self">tools/util/include/cutlass/util/reference/device/thread/gemm.h</a></td><td class="desc">Reference implementation for GEMM in host-side code </td></tr>
<tr id="row_59_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tools_2util_2include_2cutlass_2util_2reference_2host_2gemm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tools_2util_2include_2cutlass_2util_2reference_2host_2gemm_8h.html" target="_self">tools/util/include/cutlass/util/reference/host/gemm.h</a></td><td class="desc">Reference implementation for GEMM in host-side code </td></tr>
<tr id="row_60_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device_2gemm__batched_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device_2gemm__batched_8h.html" target="_self">device/gemm_batched.h</a></td><td class="desc">Template for a pipelined GEMM kernel. Does not compute batching or support split-K </td></tr>
<tr id="row_61_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="kernel_2gemm__batched_8h_source.html"><span class="icondoc"></span></a><a class="el" href="kernel_2gemm__batched_8h.html" target="_self">kernel/gemm_batched.h</a></td><td class="desc">Template for a pipelined GEMM kernel. Does not compute batching or support split-K </td></tr>
<tr id="row_62_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="include_2cutlass_2gemm_2device_2gemm__complex_8h_source.html"><span class="icondoc"></span></a><a class="el" href="include_2cutlass_2gemm_2device_2gemm__complex_8h.html" target="_self">include/cutlass/gemm/device/gemm_complex.h</a></td><td class="desc">Template for a pipelined GEMM kernel. Does not compute batching or support split-K </td></tr>
<tr id="row_63_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tools_2util_2include_2cutlass_2util_2reference_2host_2gemm__complex_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tools_2util_2include_2cutlass_2util_2reference_2host_2gemm__complex_8h.html" target="_self">tools/util/include/cutlass/util/reference/host/gemm_complex.h</a></td><td class="desc">Reference implementation for complex-valued GEMM in host-side code </td></tr>
<tr id="row_64_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm__pipelined_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm__pipelined_8h.html" target="_self">gemm_pipelined.h</a></td><td class="desc">Template for a pipelined GEMM kernel. Does not compute batching or support split-K </td></tr>
<tr id="row_65_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device_2gemm__splitk__parallel_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device_2gemm__splitk__parallel_8h.html" target="_self">device/gemm_splitk_parallel.h</a></td><td class="desc">Template for GEMM performing a reduction over K partitions in parallel </td></tr>
<tr id="row_66_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="kernel_2gemm__splitk__parallel_8h_source.html"><span class="icondoc"></span></a><a class="el" href="kernel_2gemm__splitk__parallel_8h.html" target="_self">kernel/gemm_splitk_parallel.h</a></td><td class="desc">Template for GEMM performing a reduction over K partitions in parallel </td></tr>
<tr id="row_67_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemv_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemv_8h.html" target="_self">gemv.h</a></td><td class="desc">Template for a threadblock-scoped GEMV kernel </td></tr>
<tr id="row_68_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemv__batched__strided_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemv__batched__strided_8h.html" target="_self">gemv_batched_strided.h</a></td><td class="desc"></td></tr>
<tr id="row_69_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="half_8h_source.html"><span class="icondoc"></span></a><a class="el" href="half_8h.html" target="_self">half.h</a></td><td class="desc">Defines a class for using IEEE half-precision floating-point types in host or device code </td></tr>
<tr id="row_70_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="host__reorder_8h_source.html"><span class="icondoc"></span></a><a class="el" href="host__reorder_8h.html" target="_self">host_reorder.h</a></td><td class="desc">Reorder data from the host side </td></tr>
<tr id="row_71_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="host__tensor_8h_source.html"><span class="icondoc"></span></a><a class="el" href="host__tensor_8h.html" target="_self">host_tensor.h</a></td><td class="desc">HostTensor contributes management for both host and device memory </td></tr>
<tr id="row_72_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="inner__product_8h_source.html"><span class="icondoc"></span></a><a class="el" href="inner__product_8h.html" target="_self">inner_product.h</a></td><td class="desc">Reference implementation for GEMM in host-side code </td></tr>
<tr id="row_73_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="integer__subbyte_8h_source.html"><span class="icondoc"></span></a><a class="el" href="integer__subbyte_8h.html" target="_self">integer_subbyte.h</a></td><td class="desc">Defines a class for using integer types smaller than one byte in host or device code </td></tr>
<tr id="row_74_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="interleaved__epilogue_8h_source.html"><span class="icondoc"></span></a><a class="el" href="interleaved__epilogue_8h.html" target="_self">interleaved_epilogue.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using Tensor Ops </td></tr>
<tr id="row_75_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="kernel__launch_8h_source.html"><span class="icondoc"></span></a><a class="el" href="kernel__launch_8h.html" target="_self">kernel_launch.h</a></td><td class="desc">Defines structures and helpers to launch CUDA kernels within CUTLASS </td></tr>
<tr id="row_76_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="layout_8h_source.html"><span class="icondoc"></span></a><a class="el" href="layout_8h.html" target="_self">layout.h</a></td><td class="desc">Defines layout functions used by TensorRef and derived classes </td></tr>
<tr id="row_77_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="library_8h_source.html"><span class="icondoc"></span></a><a class="el" href="library_8h.html" target="_self">library.h</a></td><td class="desc">CUTLASS Library is an object-oriented approach to managing operations implemented by CUTLASS </td></tr>
<tr id="row_78_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="linear__combination_8h_source.html"><span class="icondoc"></span></a><a class="el" href="linear__combination_8h.html" target="_self">linear_combination.h</a></td><td class="desc">Functor performing linear combination operations used by epilogues </td></tr>
<tr id="row_79_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="linear__combination__clamp_8h_source.html"><span class="icondoc"></span></a><a class="el" href="linear__combination__clamp_8h.html" target="_self">linear_combination_clamp.h</a></td><td class="desc">Functor performing linear scaling operations used by epilogues. Values are clamped before converting to the output element type </td></tr>
<tr id="row_80_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="linear__combination__relu_8h_source.html"><span class="icondoc"></span></a><a class="el" href="linear__combination__relu_8h.html" target="_self">linear_combination_relu.h</a></td><td class="desc">Functor performing linear combination operations used by epilogues. Values are clamped before converting to the output element type </td></tr>
<tr id="row_81_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="manifest_8h_source.html"><span class="icondoc"></span></a><a class="el" href="manifest_8h.html" target="_self">manifest.h</a></td><td class="desc">Manifest of CUTLASS Library </td></tr>
<tr id="row_82_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="layout_2matrix_8h_source.html"><span class="icondoc"></span></a><a class="el" href="layout_2matrix_8h.html" target="_self">layout/matrix.h</a></td><td class="desc">Defines layout functions used by TensorRef and derived classes </td></tr>
<tr id="row_83_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="thread_2matrix_8h_source.html"><span class="icondoc"></span></a><a class="el" href="thread_2matrix_8h.html" target="_self">thread/matrix.h</a></td><td class="desc">Defines a matrix object intended for storing data in registers and operations within a CUDA thread </td></tr>
<tr id="row_84_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="matrix__coord_8h_source.html"><span class="icondoc"></span></a><a class="el" href="matrix__coord_8h.html" target="_self">matrix_coord.h</a></td><td class="desc">Defines a canonical coordinate for rank=2 matrices offering named indices </td></tr>
<tr id="row_85_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="matrix__shape_8h_source.html"><span class="icondoc"></span></a><a class="el" href="matrix__shape_8h.html" target="_self">matrix_shape.h</a></td><td class="desc">Defines a Shape template for matrix tiles </td></tr>
<tr id="row_86_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="matrix__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="matrix__traits_8h.html" target="_self">matrix_traits.h</a></td><td class="desc">Defines properties of matrices used to denote layout and operands to GEMM kernels </td></tr>
<tr id="row_87_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="memory_8h_source.html"><span class="icondoc"></span></a><a class="el" href="memory_8h.html" target="_self">memory.h</a></td><td class="desc">Architecture-specific operators on memory </td></tr>
<tr id="row_88_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="memory__sm75_8h_source.html"><span class="icondoc"></span></a><a class="el" href="memory__sm75_8h.html" target="_self">memory_sm75.h</a></td><td class="desc">Architecture-specific operators on memory added for SM75 </td></tr>
<tr id="row_89_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="arch_2mma_8h_source.html"><span class="icondoc"></span></a><a class="el" href="arch_2mma_8h.html" target="_self">arch/mma.h</a></td><td class="desc">Templates exposing architecture support for multiply-add operations </td></tr>
<tr id="row_90_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm_2thread_2mma_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm_2thread_2mma_8h.html" target="_self">gemm/thread/mma.h</a></td><td class="desc">Templates exposing architecture support for warp-level multiply-add operations </td></tr>
<tr id="row_91_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm_2warp_2mma_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm_2warp_2mma_8h.html" target="_self">gemm/warp/mma.h</a></td><td class="desc">Templates exposing architecture support for warp-level multiply-add operations </td></tr>
<tr id="row_92_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__base_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__base_8h.html" target="_self">mma_base.h</a></td><td class="desc">Template for a double-buffered threadblock-scoped GEMM kernel </td></tr>
<tr id="row_93_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__complex__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__complex__tensor__op_8h.html" target="_self">mma_complex_tensor_op.h</a></td><td class="desc">Templates implementing warp-level matrix multiply-accumulate operations targeting Tensor Cores </td></tr>
<tr id="row_94_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__pipelined_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__pipelined_8h.html" target="_self">mma_pipelined.h</a></td><td class="desc">Template for a double-buffered threadblock-scoped GEMM kernel </td></tr>
<tr id="row_95_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__simt_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__simt_8h.html" target="_self">mma_simt.h</a></td><td class="desc">Templates implementing warp-level matrix multiply-accumulate operations </td></tr>
<tr id="row_96_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__simt__policy_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__simt__policy_8h.html" target="_self">mma_simt_policy.h</a></td><td class="desc">Describes the lane policy used by warp-level matrix multiply operators targeting SIMT instructions </td></tr>
<tr id="row_97_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__simt__tile__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__simt__tile__iterator_8h.html" target="_self">mma_simt_tile_iterator.h</a></td><td class="desc">Describes the lane policy used by warp-level matrix multiply operators targeting SIMT instructions </td></tr>
<tr id="row_98_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__singlestage_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__singlestage_8h.html" target="_self">mma_singlestage.h</a></td><td class="desc">Template for a double-buffered threadblock-scoped GEMM kernel </td></tr>
<tr id="row_99_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="arch_2mma__sm50_8h_source.html"><span class="icondoc"></span></a><a class="el" href="arch_2mma__sm50_8h.html" target="_self">arch/mma_sm50.h</a></td><td class="desc">Matrix multiply </td></tr>
<tr id="row_100_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm_2thread_2mma__sm50_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm_2thread_2mma__sm50_8h.html" target="_self">gemm/thread/mma_sm50.h</a></td><td class="desc">Templates exposing architecture support for multiply-add operations </td></tr>
<tr id="row_101_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="arch_2mma__sm60_8h_source.html"><span class="icondoc"></span></a><a class="el" href="arch_2mma__sm60_8h.html" target="_self">arch/mma_sm60.h</a></td><td class="desc">Matrix multiply </td></tr>
<tr id="row_102_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm_2thread_2mma__sm60_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm_2thread_2mma__sm60_8h.html" target="_self">gemm/thread/mma_sm60.h</a></td><td class="desc">Templates exposing architecture support for multiply-add operations </td></tr>
<tr id="row_103_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="arch_2mma__sm61_8h_source.html"><span class="icondoc"></span></a><a class="el" href="arch_2mma__sm61_8h.html" target="_self">arch/mma_sm61.h</a></td><td class="desc">Matrix multiply </td></tr>
<tr id="row_104_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm_2thread_2mma__sm61_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm_2thread_2mma__sm61_8h.html" target="_self">gemm/thread/mma_sm61.h</a></td><td class="desc">Templates exposing architecture support for multiply-add operations </td></tr>
<tr id="row_105_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__sm70_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__sm70_8h.html" target="_self">mma_sm70.h</a></td><td class="desc">Matrix multiply </td></tr>
<tr id="row_106_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__sm75_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__sm75_8h.html" target="_self">mma_sm75.h</a></td><td class="desc">Matrix multiply for SM75 </td></tr>
<tr id="row_107_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__tensor__op_8h.html" target="_self">mma_tensor_op.h</a></td><td class="desc">Templates implementing warp-level matrix multiply-accumulate operations targeting Tensor Cores </td></tr>
<tr id="row_108_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__tensor__op__policy_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__tensor__op__policy_8h.html" target="_self">mma_tensor_op_policy.h</a></td><td class="desc">Policy describing implementation details of warp-level GEMM targeting Tensor Cores </td></tr>
<tr id="row_109_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__tensor__op__sm70_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__tensor__op__sm70_8h.html" target="_self">mma_tensor_op_sm70.h</a></td><td class="desc">Templates implementing warp-level matrix multiply-accumulate operations targeting Tensor Cores </td></tr>
<tr id="row_110_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__tensor__op__tile__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__tensor__op__tile__iterator_8h.html" target="_self">mma_tensor_op_tile_iterator.h</a></td><td class="desc">Defines iterators used by warp-level matrix multiply operations targeting Tensor Cores </td></tr>
<tr id="row_111_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__tensor__op__tile__iterator__sm70_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__tensor__op__tile__iterator__sm70_8h.html" target="_self">mma_tensor_op_tile_iterator_sm70.h</a></td><td class="desc">Defines iterators used by warp-level matrix multiply operations targeting Tensor Cores </td></tr>
<tr id="row_112_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__tensor__op__tile__iterator__wmma_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__tensor__op__tile__iterator__wmma_8h.html" target="_self">mma_tensor_op_tile_iterator_wmma.h</a></td><td class="desc">Defines iterators used by warp-level matrix multiply operations targeting Tensor Cores </td></tr>
<tr id="row_113_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="mma__tensor__op__wmma_8h_source.html"><span class="icondoc"></span></a><a class="el" href="mma__tensor__op__wmma_8h.html" target="_self">mma_tensor_op_wmma.h</a></td><td class="desc">Templates implementing warp-level matrix multiply-accumulate operations targeting Tensor Cores </td></tr>
<tr id="row_114_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="numeric__conversion_8h_source.html"><span class="icondoc"></span></a><a class="el" href="numeric__conversion_8h.html" target="_self">numeric_conversion.h</a></td><td class="desc">Boost-like numeric conversion operator for CUTLASS numeric types </td></tr>
<tr id="row_115_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="numeric__types_8h_source.html"><span class="icondoc"></span></a><a class="el" href="numeric__types_8h.html" target="_self">numeric_types.h</a></td><td class="desc">Top-level include for all CUTLASS numeric types </td></tr>
<tr id="row_116_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="output__tile__thread__map_8h_source.html"><span class="icondoc"></span></a><a class="el" href="output__tile__thread__map_8h.html" target="_self">output_tile_thread_map.h</a></td><td class="desc">Metaprogram for determining the mapping of output elements to threads for epilogue tiles </td></tr>
<tr id="row_117_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="pitch__linear_8h_source.html"><span class="icondoc"></span></a><a class="el" href="pitch__linear_8h.html" target="_self">pitch_linear.h</a></td><td class="desc">Defines layout functions used by TensorRef and derived classes for pitch-linear memory </td></tr>
<tr id="row_118_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="pitch__linear__thread__map_8h_source.html"><span class="icondoc"></span></a><a class="el" href="pitch__linear__thread__map_8h.html" target="_self">pitch_linear_thread_map.h</a></td><td class="desc">Templates implementing how threads are mapped to a given tile </td></tr>
<tr id="row_119_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="platform_8h_source.html"><span class="icondoc"></span></a><a class="el" href="platform_8h.html" target="_self">platform.h</a></td><td class="desc">C++ features that may be otherwise unimplemented for CUDA device functions </td></tr>
<tr id="row_120_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="predicate__vector_8h_source.html"><span class="icondoc"></span></a><a class="el" href="predicate__vector_8h.html" target="_self">predicate_vector.h</a></td><td class="desc">Defines container classes and iterators for managing a statically sized vector of boolean predicates </td></tr>
<tr id="row_121_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="predicated__tile__access__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="predicated__tile__access__iterator_8h.html" target="_self">predicated_tile_access_iterator.h</a></td><td class="desc">Templates calculating the address and predicates to the load of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_122_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="predicated__tile__access__iterator__2dthreadtile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="predicated__tile__access__iterator__2dthreadtile_8h.html" target="_self">predicated_tile_access_iterator_2dthreadtile.h</a></td><td class="desc">Templates calculating the address and predicates to the load of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_123_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="epilogue_2threadblock_2predicated__tile__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="epilogue_2threadblock_2predicated__tile__iterator_8h.html" target="_self">epilogue/threadblock/predicated_tile_iterator.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using Tensor Ops </td></tr>
<tr id="row_124_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="transform_2threadblock_2predicated__tile__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="transform_2threadblock_2predicated__tile__iterator_8h.html" target="_self">transform/threadblock/predicated_tile_iterator.h</a></td><td class="desc">Templates implementing loading of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_125_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="predicated__tile__iterator__2dthreadtile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="predicated__tile__iterator__2dthreadtile_8h.html" target="_self">predicated_tile_iterator_2dthreadtile.h</a></td><td class="desc">Templates implementing loading of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_126_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="real_8h_source.html"><span class="icondoc"></span></a><a class="el" href="real_8h.html" target="_self">real.h</a></td><td class="desc"></td></tr>
<tr id="row_127_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="reduce_8h_source.html"><span class="icondoc"></span></a><a class="el" href="reduce_8h.html" target="_self">reduce.h</a></td><td class="desc">Defines basic thread level reduction with specializations for Array&lt;T, N&gt; </td></tr>
<tr id="row_128_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="reduce__split__k_8h_source.html"><span class="icondoc"></span></a><a class="el" href="reduce__split__k_8h.html" target="_self">reduce_split_k.h</a></td><td class="desc">Kernel performing a reduction over densely packed tensors in global memory </td></tr>
<tr id="row_129_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="reduction__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="reduction__op_8h.html" target="_self">reduction_op.h</a></td><td class="desc">Functor performing reduction operations used by epilogues </td></tr>
<tr id="row_130_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="reduction__operators_8h_source.html"><span class="icondoc"></span></a><a class="el" href="reduction__operators_8h.html" target="_self">reduction_operators.h</a></td><td class="desc">Kernel performing a reduction over densely packed tensors in global memory </td></tr>
<tr id="row_131_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="regular__tile__access__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="regular__tile__access__iterator_8h.html" target="_self">regular_tile_access_iterator.h</a></td><td class="desc">Templates implementing the address computation of storing of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_132_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="regular__tile__access__iterator__pitch__linear_8h_source.html"><span class="icondoc"></span></a><a class="el" href="regular__tile__access__iterator__pitch__linear_8h.html" target="_self">regular_tile_access_iterator_pitch_linear.h</a></td><td class="desc">Templates implementing computing the addresses of storing of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_133_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="regular__tile__access__iterator__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="regular__tile__access__iterator__tensor__op_8h.html" target="_self">regular_tile_access_iterator_tensor_op.h</a></td><td class="desc">Templates implementing computing the addresses of storing of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_134_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="regular__tile__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="regular__tile__iterator_8h.html" target="_self">regular_tile_iterator.h</a></td><td class="desc">Templates implementing storing of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_135_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="regular__tile__iterator__pitch__linear_8h_source.html"><span class="icondoc"></span></a><a class="el" href="regular__tile__iterator__pitch__linear_8h.html" target="_self">regular_tile_iterator_pitch_linear.h</a></td><td class="desc">Templates implementing loading of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_136_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="regular__tile__iterator__pitch__linear__2dthreadtile_8h_source.html"><span class="icondoc"></span></a><a class="el" href="regular__tile__iterator__pitch__linear__2dthreadtile_8h.html" target="_self">regular_tile_iterator_pitch_linear_2dthreadtile.h</a></td><td class="desc">Templates implementing loading of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_137_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="regular__tile__iterator__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="regular__tile__iterator__tensor__op_8h.html" target="_self">regular_tile_iterator_tensor_op.h</a></td><td class="desc">Templates implementing storing of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_138_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="regular__tile__iterator__tensor__op__sm70_8h_source.html"><span class="icondoc"></span></a><a class="el" href="regular__tile__iterator__tensor__op__sm70_8h.html" target="_self">regular_tile_iterator_tensor_op_sm70.h</a></td><td class="desc">Templates implementing loading of tiles from pitch-linear rank=2 tensors </td></tr>
<tr id="row_139_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="relatively__equal_8h_source.html"><span class="icondoc"></span></a><a class="el" href="relatively__equal_8h.html" target="_self">relatively_equal.h</a></td><td class="desc"></td></tr>
<tr id="row_140_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="semaphore_8h_source.html"><span class="icondoc"></span></a><a class="el" href="semaphore_8h.html" target="_self">semaphore.h</a></td><td class="desc">Implementation of a CTA-wide semaphore for inter-CTA synchronization </td></tr>
<tr id="row_141_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="shared__load__iterator_8h_source.html"><span class="icondoc"></span></a><a class="el" href="shared__load__iterator_8h.html" target="_self">shared_load_iterator.h</a></td><td class="desc">Epilogue for threadblock scoped GEMMs using Tensor Ops </td></tr>
<tr id="row_142_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="simd_8h_source.html"><span class="icondoc"></span></a><a class="el" href="simd_8h.html" target="_self">simd.h</a></td><td class="desc">Templates exposing SIMD operators </td></tr>
<tr id="row_143_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="simd__sm60_8h_source.html"><span class="icondoc"></span></a><a class="el" href="simd__sm60_8h.html" target="_self">simd_sm60.h</a></td><td class="desc">Templates exposing SIMD operators for SM60 </td></tr>
<tr id="row_144_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="simd__sm61_8h_source.html"><span class="icondoc"></span></a><a class="el" href="simd__sm61_8h.html" target="_self">simd_sm61.h</a></td><td class="desc">Templates exposing SIMD operators for SM60 </td></tr>
<tr id="row_145_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="simt__policy_8h_source.html"><span class="icondoc"></span></a><a class="el" href="simt__policy_8h.html" target="_self">simt_policy.h</a></td><td class="desc">Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of SimtOp instructions, of which a row-oriented slice is visible per iteration </td></tr>
<tr id="row_146_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="subbyte__reference_8h_source.html"><span class="icondoc"></span></a><a class="el" href="subbyte__reference_8h.html" target="_self">subbyte_reference.h</a></td><td class="desc">Provides a mechanism for packing and unpacking elements smaller than one byte </td></tr>
<tr id="row_147_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor_8h.html" target="_self">tensor.h</a></td><td class="desc">Defines layout functions used by TensorRef and derived classes for common 4-D and 5-D tensor formats </td></tr>
<tr id="row_148_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device_2tensor__compare_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device_2tensor__compare_8h.html" target="_self">device/tensor_compare.h</a></td><td class="desc"></td></tr>
<tr id="row_149_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="host_2tensor__compare_8h_source.html"><span class="icondoc"></span></a><a class="el" href="host_2tensor__compare_8h.html" target="_self">host/tensor_compare.h</a></td><td class="desc"></td></tr>
<tr id="row_150_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__coord_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__coord_8h.html" target="_self">tensor_coord.h</a></td><td class="desc">Defines a canonical coordinate for rank=4 tensors offering named indices </td></tr>
<tr id="row_151_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__copy_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__copy_8h.html" target="_self">tensor_copy.h</a></td><td class="desc"></td></tr>
<tr id="row_152_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device_2kernel_2tensor__elementwise_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device_2kernel_2tensor__elementwise_8h.html" target="_self">device/kernel/tensor_elementwise.h</a></td><td class="desc"></td></tr>
<tr id="row_153_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="host_2tensor__elementwise_8h_source.html"><span class="icondoc"></span></a><a class="el" href="host_2tensor__elementwise_8h.html" target="_self">host/tensor_elementwise.h</a></td><td class="desc"></td></tr>
<tr id="row_154_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device_2tensor__fill_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device_2tensor__fill_8h.html" target="_self">device/tensor_fill.h</a></td><td class="desc"></td></tr>
<tr id="row_155_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="host_2tensor__fill_8h_source.html"><span class="icondoc"></span></a><a class="el" href="host_2tensor__fill_8h.html" target="_self">host/tensor_fill.h</a></td><td class="desc"></td></tr>
<tr id="row_156_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device_2kernel_2tensor__foreach_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device_2kernel_2tensor__foreach_8h.html" target="_self">device/kernel/tensor_foreach.h</a></td><td class="desc"></td></tr>
<tr id="row_157_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="device_2tensor__foreach_8h_source.html"><span class="icondoc"></span></a><a class="el" href="device_2tensor__foreach_8h.html" target="_self">device/tensor_foreach.h</a></td><td class="desc"></td></tr>
<tr id="row_158_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="host_2tensor__foreach_8h_source.html"><span class="icondoc"></span></a><a class="el" href="host_2tensor__foreach_8h.html" target="_self">host/tensor_foreach.h</a></td><td class="desc"></td></tr>
<tr id="row_159_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__norm_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__norm_8h.html" target="_self">tensor_norm.h</a></td><td class="desc"></td></tr>
<tr id="row_160_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__op__multiplicand__sm70_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__op__multiplicand__sm70_8h.html" target="_self">tensor_op_multiplicand_sm70.h</a></td><td class="desc"></td></tr>
<tr id="row_161_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__op__multiplicand__sm75_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__op__multiplicand__sm75_8h.html" target="_self">tensor_op_multiplicand_sm75.h</a></td><td class="desc"></td></tr>
<tr id="row_162_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__op__policy_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__op__policy_8h.html" target="_self">tensor_op_policy.h</a></td><td class="desc">Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of TensorOp instructions, of which a row-oriented slice is visible per iteration </td></tr>
<tr id="row_163_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__ref_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__ref_8h.html" target="_self">tensor_ref.h</a></td><td class="desc">Defines a structure containing strides, bounds, and a pointer to tensor data </td></tr>
<tr id="row_164_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__view_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__view_8h.html" target="_self">tensor_view.h</a></td><td class="desc">Defines a structure containing strides and a pointer to tensor data </td></tr>
<tr id="row_165_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tensor__view__io_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tensor__view__io_8h.html" target="_self">tensor_view_io.h</a></td><td class="desc"></td></tr>
<tr id="row_166_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="gemm_2threadblock_2threadblock__swizzle_8h_source.html"><span class="icondoc"></span></a><a class="el" href="gemm_2threadblock_2threadblock__swizzle_8h.html" target="_self">gemm/threadblock/threadblock_swizzle.h</a></td><td class="desc">Implements several possible threadblock-swizzling functions mapping blockIdx to GEMM problems </td></tr>
<tr id="row_167_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="reduction_2threadblock__swizzle_8h_source.html"><span class="icondoc"></span></a><a class="el" href="reduction_2threadblock__swizzle_8h.html" target="_self">reduction/threadblock_swizzle.h</a></td><td class="desc">Defies functors for mapping blockIdx to partitions of the batched reduction computation </td></tr>
<tr id="row_168_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__iterator__simt_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__iterator__simt_8h.html" target="_self">tile_iterator_simt.h</a></td><td class="desc"></td></tr>
<tr id="row_169_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__iterator__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__iterator__tensor__op_8h.html" target="_self">tile_iterator_tensor_op.h</a></td><td class="desc"></td></tr>
<tr id="row_170_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__iterator__volta__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__iterator__volta__tensor__op_8h.html" target="_self">tile_iterator_volta_tensor_op.h</a></td><td class="desc"></td></tr>
<tr id="row_171_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="tile__iterator__wmma__tensor__op_8h_source.html"><span class="icondoc"></span></a><a class="el" href="tile__iterator__wmma__tensor__op_8h.html" target="_self">tile_iterator_wmma_tensor_op.h</a></td><td class="desc"></td></tr>
<tr id="row_172_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="transpose_8h_source.html"><span class="icondoc"></span></a><a class="el" href="transpose_8h.html" target="_self">transpose.h</a></td><td class="desc">Basic copy routines for tensor views </td></tr>
<tr id="row_173_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="type__traits_8h_source.html"><span class="icondoc"></span></a><a class="el" href="type__traits_8h.html" target="_self">type_traits.h</a></td><td class="desc">Type traits for common CUDA types </td></tr>
<tr id="row_174_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="vector_8h_source.html"><span class="icondoc"></span></a><a class="el" href="vector_8h.html" target="_self">vector.h</a></td><td class="desc">Defines layout functions used for rank=1 vectors </td></tr>
<tr id="row_175_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="volta__tensor__op__policy_8h_source.html"><span class="icondoc"></span></a><a class="el" href="volta__tensor__op__policy_8h.html" target="_self">volta_tensor_op_policy.h</a></td><td class="desc">Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of TensorOp instructions, of which a row-oriented slice is visible per iteration </td></tr>
<tr id="row_176_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma_8h.html" target="_self">wmma.h</a></td><td class="desc">Templates exposing architecture support for warp matrix multiply-add (WMMA) operations </td></tr>
<tr id="row_177_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__array_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__array_8h.html" target="_self">wmma_array.h</a></td><td class="desc">Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe to use in a union </td></tr>
<tr id="row_178_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__ptx_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__ptx_8h.html" target="_self">wmma_ptx.h</a></td><td class="desc">Templates exposing warp matrix multiply-add (WMMA) operations </td></tr>
<tr id="row_179_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__sm70_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__sm70_8h.html" target="_self">wmma_sm70.h</a></td><td class="desc">Matrix multiply </td></tr>
<tr id="row_180_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__sm72_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__sm72_8h.html" target="_self">wmma_sm72.h</a></td><td class="desc">Matrix multiply </td></tr>
<tr id="row_181_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__sm75_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__sm75_8h.html" target="_self">wmma_sm75.h</a></td><td class="desc">Matrix multiply </td></tr>
<tr id="row_182_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><a href="wmma__tensor__op__policy_8h_source.html"><span class="icondoc"></span></a><a class="el" href="wmma__tensor__op__policy_8h.html" target="_self">wmma_tensor_op_policy.h</a></td><td class="desc">Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of TensorOp instructions, of which a row-oriented slice is visible per iteration </td></tr>
</table>
</div><!-- directory -->
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.11
</small></address>
</body>
</html>