/* Copyright 1993-2016 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * The source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * The Licensed Deliverables contained herein are PROPRIETARY and * CONFIDENTIAL to NVIDIA and are being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ #ifndef _CG_REDUCE_H_ #define _CG_REDUCE_H_ #include "info.h" #include "helpers.h" #include "coalesced_reduce.h" #include "functional.h" #include "cooperative_groups.h" _CG_BEGIN_NAMESPACE namespace details { template using _redux_is_add_supported = _CG_STL_NAMESPACE::integral_constant< bool, _CG_STL_NAMESPACE::is_integral::value && (sizeof(Ty) <= 4)>; template using redux_is_add_supported = _redux_is_add_supported; // A specialization for 64 bit logical operations is possible // but for now only accelerate 32 bit bitwise ops template using redux_is_logical_supported = redux_is_add_supported; // Base operator support case template struct _redux_op_supported : public _CG_STL_NAMESPACE::false_type {}; template struct _redux_op_supported, Ty> : public redux_is_add_supported {}; template struct _redux_op_supported, Ty> : public redux_is_add_supported {}; template struct _redux_op_supported, Ty> : public redux_is_add_supported {}; template struct _redux_op_supported, Ty> : public redux_is_logical_supported {}; template struct _redux_op_supported, Ty> : public redux_is_logical_supported {}; template struct _redux_op_supported, Ty> : public redux_is_logical_supported {}; template class TyOp> using redux_op_supported = _redux_op_supported< typename details::remove_qual>, Ty>; // Groups smaller than 16 actually have worse performance characteristics when used with redux // tiles of size 16 and 32 perform the same or better and have better code generation profiles template struct _redux_group_optimized : public _CG_STL_NAMESPACE::false_type {}; template struct _redux_group_optimized> : public _CG_STL_NAMESPACE::integral_constant< bool, (Sz >= 16)> {}; template struct _redux_group_optimized> : public _CG_STL_NAMESPACE::integral_constant< bool, (Sz >= 16)> {}; template <> struct _redux_group_optimized : public _CG_STL_NAMESPACE::true_type {}; template using redux_group_optimized = _redux_group_optimized>; template