/* * Copyright 2022 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * This source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * These Licensed Deliverables contained herein is PROPRIETARY and * CONFIDENTIAL to NVIDIA and is being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ #if !defined(__CUDA_FP8_HPP__) #define __CUDA_FP8_HPP__ #if !defined(__CUDA_FP8_H__) #error "Do not include this file directly. Instead, include cuda_fp8.h." #endif /* C++ header for std::memcpy (used for type punning in host-side * implementations). When compiling as a CUDA source file memcpy is provided * implicitly. !defined(__CUDACC__) implies !defined(__CUDACC_RTC__). */ #if defined(__cplusplus) && !defined(__CUDACC__) #include <cstring> #elif !defined(__cplusplus) && !defined(__CUDACC__) #include <string.h> #endif /* defined(__cplusplus) && !defined(__CUDACC__) */ /* Set up structure-alignment attribute */ #if !(defined __CUDA_ALIGN__) #if defined(__CUDACC__) #define __CUDA_ALIGN__(align) __align__(align) #else /* Define alignment macro based on compiler type (cannot assume C11 "_Alignas" * is available) */ #if __cplusplus >= 201103L #define __CUDA_ALIGN__(n) \ alignas(n) /* C++11 kindly gives us a keyword for this */ #else /* !defined(__CPP_VERSION_AT_LEAST_11_FP8)*/ #if defined(__GNUC__) #define __CUDA_ALIGN__(n) __attribute__((aligned(n))) #elif defined(_MSC_VER) #define __CUDA_ALIGN__(n) __declspec(align(n)) #else #define __CUDA_ALIGN__(n) #endif /* defined(__GNUC__) */ #endif /* defined(__CPP_VERSION_AT_LEAST_11_FP8) */ #endif /* defined(__CUDACC__) */ #endif /* !(defined __CUDA_ALIGN__) */ #if !(defined __CPP_VERSION_AT_LEAST_11_FP8) /* need c++11 for explicit operators */ #define __CUDA_NO_FP8_CONVERSION_OPERATORS__ #endif __CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t __nv_cvt_double_to_fp8(const double x, const __nv_saturation_t saturate, const __nv_fp8_interpretation_t fp8_interpretation) { unsigned char res; unsigned long long int xbits; #if defined(__CUDACC__) || (!defined __cplusplus) (void)memcpy(&xbits, &x, sizeof(x)); #else (void)std::memcpy(&xbits, &x, sizeof(x)); #endif unsigned char FP8_MAXNORM; unsigned char FP8_MANTISSA_MASK; unsigned short int FP8_EXP_BIAS; unsigned long long int FP8_SIGNIFICAND_BITS; const unsigned long long int DP_INF_BITS = 0x7FF0000000000000ULL; unsigned long long int FP8_MINDENORM_O2; unsigned long long int FP8_OVERFLOW_THRESHOLD; unsigned long long int FP8_MINNORM; if (fp8_interpretation == __NV_E4M3) { FP8_EXP_BIAS = 7U; FP8_SIGNIFICAND_BITS = 4ULL; FP8_MANTISSA_MASK = 0x7U; FP8_MINDENORM_O2 = 0x3F50000000000000ULL; // mindenorm/2 = 2^-10 FP8_OVERFLOW_THRESHOLD = 0x407D000000000000ULL; // maxnorm + 1/2ulp = 0x1.Cp+8 + 0x1p+4 FP8_MAXNORM = 0x7EU; FP8_MINNORM = 0x3F90000000000000ULL; // minnorm = 2^-6 } else { //__NV_E5M2 FP8_EXP_BIAS = 15U; FP8_SIGNIFICAND_BITS = 3ULL; FP8_MANTISSA_MASK = 0x3U; FP8_MINDENORM_O2 = 0x3EE0000000000000ULL; // mindenorm/2 = 2^-17 FP8_OVERFLOW_THRESHOLD = 0x40EE000000000000ULL - 1ULL; // maxnorm + 1/2ulp = 0x1.Ep+15, and -1 to have common code FP8_MAXNORM = 0x7BU; FP8_MINNORM = 0x3F10000000000000ULL; // minnorm = 2^-14 } // 1/2 LSB of the target format, positioned in double precision mantissa // helpful in midpoints detection during round-to-nearest-even step const unsigned long long int FP8_DP_HALF_ULP = (unsigned long long int)1ULL << (53ULL - FP8_SIGNIFICAND_BITS - 1ULL); // prepare sign bit in target format unsigned char sign = (unsigned char)((xbits >> 63ULL) << 7U); // prepare exponent field in target format unsigned char exp = (unsigned char)((((unsigned short int)(xbits >> 52ULL)) & 0x7FFU) - 1023U + FP8_EXP_BIAS); // round mantissa to target format width, rounding towards zero unsigned char mantissa = (unsigned char)(xbits >> (53ULL - FP8_SIGNIFICAND_BITS)) & FP8_MANTISSA_MASK; unsigned long long int absx = xbits & 0x7FFFFFFFFFFFFFFFULL; if (absx <= FP8_MINDENORM_O2) { // zero or underflow res = 0U; } else if (absx > DP_INF_BITS) { // NaN if (fp8_interpretation == __NV_E4M3) { res = 0x7FU; } else { // NaN --> QNaN res = 0x7EU | mantissa; } } else if (absx > FP8_OVERFLOW_THRESHOLD) { if (saturate == __NV_SATFINITE) { res = FP8_MAXNORM; } else { // __NV_NOSAT if (fp8_interpretation == __NV_E4M3) { // no Inf in E4M3 res = 0x7FU; // NaN } else { res = 0x7CU; // Inf in E5M2 } } } else if (absx >= FP8_MINNORM) { res = (unsigned char)((exp << (FP8_SIGNIFICAND_BITS - 1U)) | mantissa); // rounded-off bits unsigned long long int round = xbits & ((FP8_DP_HALF_ULP << 1ULL) - 1ULL); // round-to-nearest-even adjustment if ((round > FP8_DP_HALF_ULP) || ((round == FP8_DP_HALF_ULP) && (mantissa & 1U))) { res = (unsigned char)(res + 1U); } } else // Denormal range { unsigned char shift = (unsigned char)(1U - exp); // add implicit leading bit mantissa |= (unsigned char)(1U << (FP8_SIGNIFICAND_BITS - 1U)); // additional round-off due to denormalization res = (unsigned char)(mantissa >> shift); // rounded-off bits, including implicit leading bit unsigned long long int round = (xbits | ((unsigned long long int)1ULL << (53ULL - 1ULL))) & ((FP8_DP_HALF_ULP << (shift + 1ULL)) - 1ULL); // round-to-nearest-even adjustment if ((round > (FP8_DP_HALF_ULP << shift)) || ((round == (FP8_DP_HALF_ULP << shift)) && (res & 1U))) { res = (unsigned char)(res + 1U); } } res |= sign; return (__nv_fp8_storage_t)res; } __CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t __nv_cvt_double2_to_fp8x2(const double2 x, const __nv_saturation_t saturate, const __nv_fp8_interpretation_t fp8_interpretation) { __nv_fp8x2_storage_t storage = (__nv_fp8x2_storage_t)__nv_cvt_double_to_fp8( x.y, saturate, fp8_interpretation); storage = (__nv_fp8x2_storage_t)(storage << 8U); storage = (__nv_fp8x2_storage_t)(storage | __nv_cvt_double_to_fp8( x.x, saturate, fp8_interpretation)); return storage; } __CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t __nv_cvt_float_to_fp8(const float x, const __nv_saturation_t saturate, const __nv_fp8_interpretation_t fp8_interpretation) { __nv_fp8_storage_t res = 0U; #if (defined __CUDA_ARCH__) && (__CUDA_ARCH__ >= 890) if (saturate == __NV_SATFINITE) { __nv_fp8x2_storage_t storage; if (fp8_interpretation == __NV_E5M2) { asm("{cvt.rn.satfinite.e5m2x2.f32 %0, %2, %1;}\n" : "=h"(storage) : "f"(x), "f"(0.0f)); } else { asm("{cvt.rn.satfinite.e4m3x2.f32 %0, %2, %1;}\n" : "=h"(storage) : "f"(x), "f"(0.0f)); } res = (__nv_fp8_storage_t)storage; } else #endif { unsigned int xbits; #if defined(__CUDACC__) || (!defined __cplusplus) (void)memcpy(&xbits, &x, sizeof(x)); #else (void)std::memcpy(&xbits, &x, sizeof(x)); #endif // isnan if ((xbits & 0x7FFFFFFFU) > 0x7F800000U) { // Canonical NaN xbits = 0x7FFFFFFFU; } float fx; #if defined(__CUDACC__) || (!defined __cplusplus) (void)memcpy(&fx, &xbits, sizeof(xbits)); #else (void)std::memcpy(&fx, &xbits, sizeof(xbits)); #endif const double dx = (double)fx; res = __nv_cvt_double_to_fp8(dx, saturate, fp8_interpretation); } return res; } __CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t __nv_cvt_float2_to_fp8x2(const float2 x, const __nv_saturation_t saturate, const __nv_fp8_interpretation_t fp8_interpretation) { __nv_fp8x2_storage_t storage; #if (defined __CUDA_ARCH__) && (__CUDA_ARCH__ >= 890) if (saturate == __NV_SATFINITE) { if (fp8_interpretation == __NV_E5M2) { asm("{cvt.rn.satfinite.e5m2x2.f32 %0, %2, %1;}\n" : "=h"(storage) : "f"(x.x), "f"(x.y)); } else { asm("{cvt.rn.satfinite.e4m3x2.f32 %0, %2, %1;}\n" : "=h"(storage) : "f"(x.x), "f"(x.y)); } } else #endif { storage = (__nv_fp8x2_storage_t)__nv_cvt_float_to_fp8( x.y, saturate, fp8_interpretation); storage = (__nv_fp8x2_storage_t)(storage << 8U); storage = (__nv_fp8x2_storage_t)(storage | __nv_cvt_float_to_fp8( x.x, saturate, fp8_interpretation)); } return storage; } __CUDA_HOSTDEVICE_FP8_DECL__ float __internal_halfraw_to_float(const __half_raw x) { float f; #if (defined __CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) asm("{cvt.f32.f16 %0, %1;}\n" : "=f"(f) : "h"(x.x)); #else const unsigned int ux = (unsigned int)x.x; unsigned int sign = (ux >> 15U) & 1U; unsigned int exponent = (ux >> 10U) & 0x1fU; unsigned int mantissa = (ux & 0x3ffU) << 13U; if (exponent == 0x1fU) { /* NaN or Inf */ /* discard sign of a NaN */ sign = ((mantissa != 0U) ? (sign >> 1U) : sign); mantissa = ((mantissa != 0U) ? 0x7fffffU : 0U); exponent = 0xffU; } else if (exponent == 0U) { /* Denorm or Zero */ if (mantissa != 0U) { unsigned int msb; exponent = 0x71U; do { msb = (mantissa & 0x400000U); mantissa <<= 1U; /* normalize */ --exponent; } while (msb == 0U); mantissa &= 0x7fffffU; /* 1.mantissa is implicit */ } } else { exponent += 0x70U; } const unsigned int u = ((sign << 31U) | (exponent << 23U) | mantissa); #if defined(__CUDACC__) || (!defined __cplusplus) (void)memcpy(&f, &u, sizeof(u)); #else (void)std::memcpy(&f, &u, sizeof(u)); #endif #endif /* (defined __CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) */ return f; } __CUDA_HOSTDEVICE_FP8_DECL__ float2 __internal_halfraw2_to_float2(const __half2_raw x) { __half_raw raw; float2 res; raw.x = x.x; res.x = __internal_halfraw_to_float(raw); raw.x = x.y; res.y = __internal_halfraw_to_float(raw); return res; } __CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t __nv_cvt_halfraw_to_fp8(const __half_raw x, const __nv_saturation_t saturate, const __nv_fp8_interpretation_t fp8_interpretation) { __nv_fp8_storage_t res = 0U; #if (defined __CUDA_ARCH__) && (__CUDA_ARCH__ >= 890) if (saturate == __NV_SATFINITE) { unsigned int half2_storage = (unsigned int)(x.x); __nv_fp8x2_storage_t tmp; if (fp8_interpretation == __NV_E5M2) { asm("{cvt.rn.satfinite.e5m2x2.f16x2 %0, %1;}\n" : "=h"(tmp) : "r"(half2_storage)); } else { asm("{cvt.rn.satfinite.e4m3x2.f16x2 %0, %1;}\n" : "=h"(tmp) : "r"(half2_storage)); } res = (__nv_fp8_storage_t)tmp; } else #endif { float fx = __internal_halfraw_to_float(x); res = __nv_cvt_float_to_fp8(fx, saturate, fp8_interpretation); } return res; } __CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t __nv_cvt_halfraw2_to_fp8x2( const __half2_raw x, const __nv_saturation_t saturate, const __nv_fp8_interpretation_t fp8_interpretation) { __nv_fp8x2_storage_t tmp; #if (defined __CUDA_ARCH__) && (__CUDA_ARCH__ >= 890) if (saturate == __NV_SATFINITE) { unsigned int half2_storage; (void)memcpy(&half2_storage, &x, sizeof(x)); if (fp8_interpretation == __NV_E5M2) { asm("{cvt.rn.satfinite.e5m2x2.f16x2 %0, %1;}\n" : "=h"(tmp) : "r"(half2_storage)); } else { asm("{cvt.rn.satfinite.e4m3x2.f16x2 %0, %1;}\n" : "=h"(tmp) : "r"(half2_storage)); } } else #endif { __half_raw raw; raw.x = x.x; __nv_fp8_storage_t lo = __nv_cvt_halfraw_to_fp8(raw, saturate, fp8_interpretation); raw.x = x.y; __nv_fp8_storage_t hi = __nv_cvt_halfraw_to_fp8(raw, saturate, fp8_interpretation); tmp = hi; tmp = (__nv_fp8x2_storage_t)(tmp << 8U); tmp = (__nv_fp8x2_storage_t)(tmp | lo); } return tmp; } __CUDA_HOSTDEVICE_FP8_DECL__ float __internal_bf16raw_to_float(const __nv_bfloat16_raw x) { const unsigned int ux = ((unsigned int)x.x) << 16U; float fx; #if defined(__CUDACC__) || (!defined __cplusplus) (void)memcpy(&fx, &ux, sizeof(ux)); #else (void)std::memcpy(&fx, &ux, sizeof(ux)); #endif return fx; } __CUDA_HOSTDEVICE_FP8_DECL__ __nv_bfloat16_raw __internal_float_to_bf16raw_rz(const float x) { unsigned int ux; __nv_bfloat16_raw r; #if defined(__CUDACC__) || (!defined __cplusplus) (void)memcpy(&ux, &x, sizeof(x)); #else (void)std::memcpy(&ux, &x, sizeof(x)); #endif r.x = (unsigned short int)(ux >> 16U); return r; } __CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8_storage_t __nv_cvt_bfloat16raw_to_fp8( const __nv_bfloat16_raw x, const __nv_saturation_t saturate, const __nv_fp8_interpretation_t fp8_interpretation) { const float fx = __internal_bf16raw_to_float(x); const __nv_fp8_storage_t res = __nv_cvt_float_to_fp8(fx, saturate, fp8_interpretation); return res; } __CUDA_HOSTDEVICE_FP8_DECL__ __nv_fp8x2_storage_t __nv_cvt_bfloat16raw2_to_fp8x2( const __nv_bfloat162_raw x, const __nv_saturation_t saturate, const __nv_fp8_interpretation_t fp8_interpretation) { __nv_bfloat16_raw raw; raw.x = x.y; __nv_fp8x2_storage_t storage = (__nv_fp8x2_storage_t)__nv_cvt_bfloat16raw_to_fp8(raw, saturate, fp8_interpretation); storage = (__nv_fp8x2_storage_t)(storage << 8U); raw.x = x.x; storage = (__nv_fp8x2_storage_t)(storage | __nv_cvt_bfloat16raw_to_fp8( raw, saturate, fp8_interpretation)); return storage; } __CUDA_HOSTDEVICE_FP8_DECL__ __half2_raw __nv_cvt_fp8x2_to_halfraw2(const __nv_fp8x2_storage_t x, const __nv_fp8_interpretation_t fp8_interpretation); __CUDA_HOSTDEVICE_FP8_DECL__ __half_raw __nv_cvt_fp8_to_halfraw(const __nv_fp8_storage_t x, const __nv_fp8_interpretation_t fp8_interpretation) { __half_raw res; res.x = 0U; #if (defined __CUDA_ARCH__) && (__CUDA_ARCH__ >= 890) res.x = __nv_cvt_fp8x2_to_halfraw2((__nv_fp8x2_storage_t)x, fp8_interpretation) .x; #else unsigned short int ur = (unsigned short int)x; ur = (unsigned short int)(ur << 8U); if (fp8_interpretation == __NV_E5M2) { if ((ur & 0x7FFFU) > 0x7C00U) { /* If NaN, return canonical NaN */ ur = 0x7FFFU; } } else { // __NV_E4M3 unsigned short int sign = ur & 0x8000U; unsigned short int exponent = (unsigned short int)(((ur & 0x7800U) >> 1U) + 0x2000U); unsigned short int mantissa = (ur & 0x0700U) >> 1U; unsigned char absx = 0x7FU & (unsigned char)x; if (absx == 0x7FU) // NaN { ur = 0x7FFFU; // fp16 canonical NaN, discard sign } else if (exponent == 0x2000U) { // zero or denormal if (mantissa != 0U) { // normalize mantissa = (unsigned short int)(mantissa << 1U); while ((mantissa & 0x0400U) == 0U) { mantissa = (unsigned short int)(mantissa << 1U); exponent = (unsigned short int)(exponent - 0x0400U); } // discard implicit leading bit mantissa &= 0x03FFU; } else { // Zero exponent = 0U; } ur = (sign | exponent) | mantissa; } else { ur = (sign | exponent) | mantissa; } } res.x = ur; #endif return res; } __CUDA_HOSTDEVICE_FP8_DECL__ __half2_raw __nv_cvt_fp8x2_to_halfraw2(const __nv_fp8x2_storage_t x, const __nv_fp8_interpretation_t fp8_interpretation) { __half2_raw res; #if (defined __CUDA_ARCH__) && (__CUDA_ARCH__ >= 890) unsigned int half2_storage; if (fp8_interpretation == __NV_E5M2) { asm("{cvt.rn.f16x2.e5m2x2 %0, %1;}\n" : "=r"(half2_storage) : "h"(x)); } else { asm("{cvt.rn.f16x2.e4m3x2 %0, %1;}\n" : "=r"(half2_storage) : "h"(x)); } (void)memcpy(&res, &half2_storage, sizeof(half2_storage)); #else res.x = __nv_cvt_fp8_to_halfraw((__nv_fp8_storage_t)x, fp8_interpretation).x; res.y = __nv_cvt_fp8_to_halfraw((__nv_fp8_storage_t)(x >> 8U), fp8_interpretation) .x; #endif return res; } /* All other definitions in this file are only visible to C++ compilers */ #if defined(__cplusplus) /** * \defgroup CUDA_MATH_FP8_E5M2_STRUCT C++ struct for handling fp8 data type of e5m2 kind. * \ingroup CUDA_MATH_INTRINSIC_FP8 */ /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * \brief __nv_fp8_e5m2 datatype * * \details This structure implements the datatype for handling * \p fp8 floating-point numbers of \p e5m2 kind: * with 1 sign, 5 exponent, 1 implicit and 2 explicit mantissa bits. * * The structure implements converting constructors and operators. */ struct __CUDA_ALIGN__(1) __nv_fp8_e5m2 { public: /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Storage variable contains the \p fp8 floating-point data. */ __nv_fp8_storage_t __x; /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor by default. */ #if defined(__CPP_VERSION_AT_LEAST_11_FP8) __nv_fp8_e5m2() = default; #else __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2() {} #endif /* defined(__CPP_VERSION_AT_LEAST_11_FP8) */ #if !defined(__CUDA_NO_FP8_CONVERSIONS__) /* Construct from wider FP types */ /* Note we do avoid constructor init-list because of special host/device * compilation rules */ /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p __half data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const __half f) { __x = __nv_cvt_halfraw_to_fp8(static_cast<__half_raw>(f), __NV_SATFINITE, __NV_E5M2); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p __nv_bfloat16 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const __nv_bfloat16 f) { __x = __nv_cvt_bfloat16raw_to_fp8(static_cast<__nv_bfloat16_raw>(f), __NV_SATFINITE, __NV_E5M2); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p float data type, relies on \p __NV_SATFINITE behavior * for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const float f) { __x = __nv_cvt_float_to_fp8(f, __NV_SATFINITE, __NV_E5M2); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p double data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const double f) { __x = __nv_cvt_double_to_fp8(f, __NV_SATFINITE, __NV_E5M2); } /* Converts from integral */ /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p unsigned \p short \p int data type, relies on \p * __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const unsigned short int val) { __x = static_cast<__nv_fp8_e5m2>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p unsigned \p int data type, relies on \p * __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const unsigned int val) { __x = static_cast<__nv_fp8_e5m2>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p unsigned \p long \p long \p int data type, relies on * \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const unsigned long long int val) { __x = static_cast<__nv_fp8_e5m2>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p short \p int data type. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const short int val) { __x = static_cast<__nv_fp8_e5m2>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p int data type, relies on \p __NV_SATFINITE behavior * for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const int val) { __x = static_cast<__nv_fp8_e5m2>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Constructor from \p long \p long \p int data type, relies on \p * __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e5m2(const long long int val) { __x = static_cast<__nv_fp8_e5m2>(static_cast<float>(val)).__x; } #if !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) /* Widening FP converts */ /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p __half data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator __half() const { return static_cast<__half>(__nv_cvt_fp8_to_halfraw(__x, __NV_E5M2)); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p float data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator float() const { return __internal_halfraw_to_float( __nv_cvt_fp8_to_halfraw(__x, __NV_E5M2)); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p __nv_bfloat16 data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator __nv_bfloat16() const { return static_cast<__nv_bfloat16>( __internal_float_to_bf16raw_rz(float(*this))); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p double data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator double() const { return static_cast<double>(float(*this)); } /* Convert to integral */ /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p unsigned \p char data type. * Clamps negative and too large inputs to the output range. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator unsigned char() const { unsigned char i; const float f = float(*this); const unsigned char max_val = 0xFFU; const unsigned char min_val = 0U; const unsigned char bits = (*this).__x; // saturation fixup if ((bits & 0x7FU) > 0x7CU) { // NaN i = 0; } else if (f > static_cast<float>(max_val)) { // saturate maximum i = max_val; } else if (f < static_cast<float>(min_val)) { // saturate minimum i = min_val; } else { // normal value i = static_cast<unsigned char>(f); } return i; } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p unsigned \p short \p int data type. * Clamps negative and too large inputs to the output range. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator unsigned short int() const { return __half2ushort_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p unsigned \p int data type. * Clamps negative and too large inputs to the output range. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator unsigned int() const { return __half2uint_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p unsigned \p long \p long \p int data type. * Clamps negative and too large inputs to the output range. * \p NaN inputs convert to \p 0x8000000000000000ULL. */ explicit __CUDA_HOSTDEVICE_FP8__ operator unsigned long long int() const { return __half2ull_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p signed \p char data type. * Clamps too large inputs to the output range. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator signed char() const { signed char i; const float f = float(*this); const signed char max_val = (signed char)0x7FU; const signed char min_val = (signed char)0x80U; const unsigned char bits = (*this).__x; // saturation fixup if ((bits & 0x7FU) > 0x7CU) { // NaN i = 0; } else if (f > static_cast<float>(max_val)) { // saturate maximum i = max_val; } else if (f < static_cast<float>(min_val)) { // saturate minimum i = min_val; } else { // normal value i = static_cast<signed char>(f); } return i; } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p short \p int data type. * Clamps too large inputs to the output range. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator short int() const { return __half2short_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p int data type. * Clamps too large inputs to the output range. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator int() const { return __half2int_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p long \p long \p int data type. * Clamps too large inputs to the output range. * \p NaN inputs convert to \p 0x8000000000000000LL. */ explicit __CUDA_HOSTDEVICE_FP8__ operator long long int() const { return __half2ll_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E5M2_STRUCT * Conversion operator to \p bool data type. * +0 and -0 inputs convert to \p false. * Non-zero inputs convert to \p true. */ explicit __CUDA_HOSTDEVICE_FP8__ operator bool() const { return (__x & 0x7FU) != 0U; } #endif /* !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) */ #endif /* !defined(__CUDA_NO_FP8_CONVERSIONS__) */ }; /** * \defgroup CUDA_MATH_FP8X2_E5M2_STRUCT C++ struct for handling vector type of two fp8 values of e5m2 kind. * \ingroup CUDA_MATH_INTRINSIC_FP8 */ /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * \brief __nv_fp8x2_e5m2 datatype * * \details This structure implements the datatype for handling two * \p fp8 floating-point numbers of \p e5m2 kind each: * with 1 sign, 5 exponent, 1 implicit and 2 explicit mantissa bits. * * The structure implements converting constructors and operators. */ struct __CUDA_ALIGN__(2) __nv_fp8x2_e5m2 { public: /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * Storage variable contains the vector of two \p fp8 floating-point data * values. */ __nv_fp8x2_storage_t __x; /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * Constructor by default. */ #if defined(__CPP_VERSION_AT_LEAST_11_FP8) __nv_fp8x2_e5m2() = default; #else __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e5m2() {} #endif /* defined(__CPP_VERSION_AT_LEAST_11_FP8) */ #if !defined(__CUDA_NO_FP8_CONVERSIONS__) /* Construct from wider types */ /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * Constructor from \p __half2 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e5m2(const __half2 f) { __x = __nv_cvt_halfraw2_to_fp8x2(static_cast<__half2_raw>(f), __NV_SATFINITE, __NV_E5M2); } /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * Constructor from \p __nv_bfloat162 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e5m2(const __nv_bfloat162 f) { __x = __nv_cvt_bfloat16raw2_to_fp8x2(static_cast<__nv_bfloat162_raw>(f), __NV_SATFINITE, __NV_E5M2); } /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * Constructor from \p float2 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e5m2(const float2 f) { __x = __nv_cvt_float2_to_fp8x2(f, __NV_SATFINITE, __NV_E5M2); } /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * Constructor from \p double2 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e5m2(const double2 f) { __x = __nv_cvt_double2_to_fp8x2(f, __NV_SATFINITE, __NV_E5M2); } #if !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) /* Widening converts */ /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * Conversion operator to \p __half2 data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator __half2() const { return static_cast<__half2>(__nv_cvt_fp8x2_to_halfraw2(__x, __NV_E5M2)); } /** * \ingroup CUDA_MATH_FP8X2_E5M2_STRUCT * Conversion operator to \p float2 data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator float2() const { return __internal_halfraw2_to_float2( __nv_cvt_fp8x2_to_halfraw2(__x, __NV_E5M2)); } #endif /* !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) */ #endif /* !defined(__CUDA_NO_FP8_CONVERSIONS__) */ }; __CUDA_HOSTDEVICE_FP8_DECL__ unsigned int __internal_pack_u16x2_to_u32(const unsigned short int src_lo, const unsigned short int src_hi) { unsigned int dst; #if (defined __CUDACC__) && (defined __CUDA_ARCH__) asm("{ mov.b32 %0, {%1,%2};}\n" : "=r"(dst) : "h"(src_lo), "h"(src_hi)); #else dst = (static_cast<unsigned int>(src_hi) << 16U) | static_cast<unsigned int>(src_lo); #endif return dst; } /** * \defgroup CUDA_MATH_FP8X4_E5M2_STRUCT C++ struct for handling vector type of four fp8 values of e5m2 kind. * \ingroup CUDA_MATH_INTRINSIC_FP8 */ /** * \ingroup CUDA_MATH_FP8X4_E5M2_STRUCT * \brief __nv_fp8x4_e5m2 datatype * * \details This structure implements the datatype for handling four * \p fp8 floating-point numbers of \p e5m2 kind each: * with 1 sign, 5 exponent, 1 implicit and 2 explicit mantissa bits. * * The structure implements converting constructors and operators. */ struct __CUDA_ALIGN__(4) __nv_fp8x4_e5m2 { public: /** * \ingroup CUDA_MATH_FP8X4_E5M2_STRUCT * Storage variable contains the vector of four \p fp8 floating-point data * values. */ __nv_fp8x4_storage_t __x; /** * \ingroup CUDA_MATH_FP8X4_E5M2_STRUCT * Constructor by default. */ #if defined(__CPP_VERSION_AT_LEAST_11_FP8) __nv_fp8x4_e5m2() = default; #else __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e5m2() {} #endif /* defined(__CPP_VERSION_AT_LEAST_11_FP8) */ #if !defined(__CUDA_NO_FP8_CONVERSIONS__) /* Construct from wider types */ /** * \ingroup CUDA_MATH_FP8X4_E5M2_STRUCT * Constructor from a pair of \p __half2 data type values, * relies on \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e5m2(const __half2 flo, const __half2 fhi) { const __nv_fp8x2_storage_t rlo = __nv_cvt_halfraw2_to_fp8x2( static_cast<__half2_raw>(flo), __NV_SATFINITE, __NV_E5M2); const __nv_fp8x2_storage_t rhi = __nv_cvt_halfraw2_to_fp8x2( static_cast<__half2_raw>(fhi), __NV_SATFINITE, __NV_E5M2); __x = __internal_pack_u16x2_to_u32(rlo, rhi); } /** * \ingroup CUDA_MATH_FP8X4_E5M2_STRUCT * Constructor from a pair of \p __nv_bfloat162 data type values, * relies on \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e5m2(const __nv_bfloat162 flo, const __nv_bfloat162 fhi) { const __nv_fp8x2_storage_t rlo = __nv_cvt_bfloat16raw2_to_fp8x2( static_cast<__nv_bfloat162_raw>(flo), __NV_SATFINITE, __NV_E5M2); const __nv_fp8x2_storage_t rhi = __nv_cvt_bfloat16raw2_to_fp8x2( static_cast<__nv_bfloat162_raw>(fhi), __NV_SATFINITE, __NV_E5M2); __x = __internal_pack_u16x2_to_u32(rlo, rhi); } /** * \ingroup CUDA_MATH_FP8X4_E5M2_STRUCT * Constructor from \p float4 vector data type, * relies on \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e5m2(const float4 f) { const float2 flo = {f.x, f.y}; const float2 fhi = {f.z, f.w}; const __nv_fp8x2_storage_t rlo = __nv_cvt_float2_to_fp8x2(flo, __NV_SATFINITE, __NV_E5M2); const __nv_fp8x2_storage_t rhi = __nv_cvt_float2_to_fp8x2(fhi, __NV_SATFINITE, __NV_E5M2); __x = __internal_pack_u16x2_to_u32(rlo, rhi); } /** * \ingroup CUDA_MATH_FP8X4_E5M2_STRUCT * Constructor from \p double4 vector data type, * relies on \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e5m2(const double4 f) { const double2 flo = {f.x, f.y}; const double2 fhi = {f.z, f.w}; const __nv_fp8x2_storage_t rlo = __nv_cvt_double2_to_fp8x2(flo, __NV_SATFINITE, __NV_E5M2); const __nv_fp8x2_storage_t rhi = __nv_cvt_double2_to_fp8x2(fhi, __NV_SATFINITE, __NV_E5M2); __x = __internal_pack_u16x2_to_u32(rlo, rhi); } #if !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) /* Widening converts */ /** * \ingroup CUDA_MATH_FP8X4_E5M2_STRUCT * Conversion operator to \p float4 vector data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator float4() const { const __nv_fp8x2_storage_t slo = static_cast<__nv_fp8x2_storage_t>(__x); const __nv_fp8x2_storage_t shi = static_cast<__nv_fp8x2_storage_t>(__x >> 16U); float2 rlo = __internal_halfraw2_to_float2( __nv_cvt_fp8x2_to_halfraw2(slo, __NV_E5M2)); float2 rhi = __internal_halfraw2_to_float2( __nv_cvt_fp8x2_to_halfraw2(shi, __NV_E5M2)); float4 res = {rlo.x, rlo.y, rhi.x, rhi.y}; return res; } #endif /* !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) */ #endif /* !defined(__CUDA_NO_FP8_CONVERSIONS__) */ }; /** * \defgroup CUDA_MATH_FP8_E4M3_STRUCT C++ struct for handling fp8 data type of e4m3 kind. * \ingroup CUDA_MATH_INTRINSIC_FP8 */ /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * \brief __nv_fp8_e4m3 datatype * * \details This structure implements the datatype for storing * \p fp8 floating-point numbers of \p e4m3 kind: * with 1 sign, 4 exponent, 1 implicit and 3 explicit mantissa bits. * The encoding doesn't support Infinity. * NaNs are limited to 0x7F and 0xFF values. * * The structure implements converting constructors and operators. */ struct __CUDA_ALIGN__(1) __nv_fp8_e4m3 { public: /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Storage variable contains the \p fp8 floating-point data. */ __nv_fp8_storage_t __x; /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor by default. */ #if defined(__CPP_VERSION_AT_LEAST_11_FP8) __nv_fp8_e4m3() = default; #else __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3() {} #endif /* defined(__CPP_VERSION_AT_LEAST_11_FP8) */ #if !defined(__CUDA_NO_FP8_CONVERSIONS__) /* Construct from wider FP types */ /* Note we do avoid constructor init-list because of special host/device * compilation rules */ /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p __half data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const __half f) { __x = __nv_cvt_halfraw_to_fp8(static_cast<__half_raw>(f), __NV_SATFINITE, __NV_E4M3); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p __nv_bfloat16 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const __nv_bfloat16 f) { __x = __nv_cvt_bfloat16raw_to_fp8(static_cast<__nv_bfloat16_raw>(f), __NV_SATFINITE, __NV_E4M3); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p float data type, relies on \p __NV_SATFINITE behavior * for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const float f) { __x = __nv_cvt_float_to_fp8(f, __NV_SATFINITE, __NV_E4M3); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p double data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const double f) { __x = __nv_cvt_double_to_fp8(f, __NV_SATFINITE, __NV_E4M3); } /* Converts from integral */ /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p unsigned \p short \p int data type, relies on \p * __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const unsigned short int val) { __x = static_cast<__nv_fp8_e4m3>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p unsigned \p int data type, relies on \p * __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const unsigned int val) { __x = static_cast<__nv_fp8_e4m3>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p unsigned \p long \p long \p int data type, relies on * \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const unsigned long long int val) { __x = static_cast<__nv_fp8_e4m3>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p short \p int data type, relies on \p * __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const short int val) { __x = static_cast<__nv_fp8_e4m3>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p int data type, relies on \p __NV_SATFINITE behavior * for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const int val) { __x = static_cast<__nv_fp8_e4m3>(static_cast<float>(val)).__x; } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Constructor from \p long \p long \p int data type, relies on \p * __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8_e4m3(const long long int val) { __x = static_cast<__nv_fp8_e4m3>(static_cast<float>(val)).__x; } #if !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) /* Widening FP converts */ /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p __half data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator __half() const { return static_cast<__half>(__nv_cvt_fp8_to_halfraw(__x, __NV_E4M3)); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p float data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator float() const { return __internal_halfraw_to_float( __nv_cvt_fp8_to_halfraw(__x, __NV_E4M3)); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p __nv_bfloat16 data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator __nv_bfloat16() const { return static_cast<__nv_bfloat16>( __internal_float_to_bf16raw_rz(float(*this))); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p double data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator double() const { return static_cast<double>(float(*this)); } /* Convert to integral */ /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p unsigned \p char data type. * Clamps negative and too large inputs to the output range. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator unsigned char() const { unsigned char i; const float f = float(*this); const unsigned char max_val = 0xFFU; const unsigned char min_val = 0U; const unsigned char bits = (*this).__x; // saturation fixup if ((bits & 0x7FU) == 0x7FU) { // NaN i = 0; } else if (f > static_cast<float>(max_val)) { // saturate maximum i = max_val; } else if (f < static_cast<float>(min_val)) { // saturate minimum i = min_val; } else { // normal value i = static_cast<unsigned char>(f); } return i; } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p unsigned \p short \p int data type. * Clamps negative inputs to zero. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator unsigned short int() const { return __half2ushort_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p unsigned \p int data type. * Clamps negative inputs to zero. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator unsigned int() const { return __half2uint_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p unsigned \p long \p long \p int data type. * Clamps negative inputs to zero. * \p NaN inputs convert to \p 0x8000000000000000ULL. */ explicit __CUDA_HOSTDEVICE_FP8__ operator unsigned long long int() const { return __half2ull_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p signed \p char data type. * Clamps too large inputs to the output range. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator signed char() const { signed char i; const float f = float(*this); const signed char max_val = (signed char)0x7FU; const signed char min_val = (signed char)0x80U; const unsigned char bits = (*this).__x; // saturation fixup if ((bits & 0x7FU) == 0x7FU) { // NaN i = 0; } else if (f > static_cast<float>(max_val)) { // saturate maximum i = max_val; } else if (f < static_cast<float>(min_val)) { // saturate minimum i = min_val; } else { // normal value i = static_cast<signed char>(f); } return i; } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p short \p int data type. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator short int() const { return __half2short_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p int data type. * \p NaN inputs convert to \p zero. */ explicit __CUDA_HOSTDEVICE_FP8__ operator int() const { return __half2int_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p long \p long \p int data type. * \p NaN inputs convert to \p 0x8000000000000000LL. */ explicit __CUDA_HOSTDEVICE_FP8__ operator long long int() const { return __half2ll_rz(__half(*this)); } /** * \ingroup CUDA_MATH_FP8_E4M3_STRUCT * Conversion operator to \p bool data type. * +0 and -0 inputs convert to \p false. * Non-zero inputs convert to \p true. */ explicit __CUDA_HOSTDEVICE_FP8__ operator bool() const { return (__x & 0x7FU) != 0U; } #endif /* !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) */ #endif /* !defined(__CUDA_NO_FP8_CONVERSIONS__) */ }; /** * \defgroup CUDA_MATH_FP8X2_E4M3_STRUCT C++ struct for handling vector type of two fp8 values of e4m3 kind. * \ingroup CUDA_MATH_INTRINSIC_FP8 */ /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * \brief __nv_fp8x2_e4m3 datatype * * \details This structure implements the datatype for storage * and operations on the vector of two \p fp8 values of \p e4m3 kind each: * with 1 sign, 4 exponent, 1 implicit and 3 explicit mantissa bits. * The encoding doesn't support Infinity. * NaNs are limited to 0x7F and 0xFF values. */ struct __CUDA_ALIGN__(2) __nv_fp8x2_e4m3 { public: /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * Storage variable contains the vector of two \p fp8 floating-point data * values. */ __nv_fp8x2_storage_t __x; /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * Constructor by default. */ #if defined(__CPP_VERSION_AT_LEAST_11_FP8) __nv_fp8x2_e4m3() = default; #else __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e4m3() {} #endif /* defined(__CPP_VERSION_AT_LEAST_11_FP8) */ #if !defined(__CUDA_NO_FP8_CONVERSIONS__) /* Construct from wider types */ /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * Constructor from \p __half2 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e4m3(const __half2 f) { __x = __nv_cvt_halfraw2_to_fp8x2(static_cast<__half2_raw>(f), __NV_SATFINITE, __NV_E4M3); } /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * Constructor from \p __nv_bfloat162 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e4m3(const __nv_bfloat162 f) { __x = __nv_cvt_bfloat16raw2_to_fp8x2(static_cast<__nv_bfloat162_raw>(f), __NV_SATFINITE, __NV_E4M3); } /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * Constructor from \p float2 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e4m3(const float2 f) { __x = __nv_cvt_float2_to_fp8x2(f, __NV_SATFINITE, __NV_E4M3); } /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * Constructor from \p double2 data type, relies on \p __NV_SATFINITE * behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x2_e4m3(const double2 f) { __x = __nv_cvt_double2_to_fp8x2(f, __NV_SATFINITE, __NV_E4M3); } #if !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) /* Widening converts */ /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * Conversion operator to \p __half2 data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator __half2() const { return static_cast<__half2>(__nv_cvt_fp8x2_to_halfraw2(__x, __NV_E4M3)); } /** * \ingroup CUDA_MATH_FP8X2_E4M3_STRUCT * Conversion operator to \p float2 data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator float2() const { return __internal_halfraw2_to_float2( __nv_cvt_fp8x2_to_halfraw2(__x, __NV_E4M3)); } #endif /* !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) */ #endif /* !defined(__CUDA_NO_FP8_CONVERSIONS__) */ }; /** * \defgroup CUDA_MATH_FP8X4_E4M3_STRUCT C++ struct for handling vector type of four fp8 values of e4m3 kind. * \ingroup CUDA_MATH_INTRINSIC_FP8 */ /** * \ingroup CUDA_MATH_FP8X4_E4M3_STRUCT * \brief __nv_fp8x4_e4m3 datatype * * \details This structure implements the datatype for storage * and operations on the vector of four \p fp8 values of \p e4m3 kind each: * with 1 sign, 4 exponent, 1 implicit and 3 explicit mantissa bits. * The encoding doesn't support Infinity. * NaNs are limited to 0x7F and 0xFF values. */ struct __CUDA_ALIGN__(4) __nv_fp8x4_e4m3 { public: /** * \ingroup CUDA_MATH_FP8X4_E4M3_STRUCT * Storage variable contains the vector of four \p fp8 floating-point data * values. */ __nv_fp8x4_storage_t __x; /** * \ingroup CUDA_MATH_FP8X4_E4M3_STRUCT * Constructor by default. */ #if defined(__CPP_VERSION_AT_LEAST_11_FP8) __nv_fp8x4_e4m3() = default; #else __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e4m3() {} #endif /* defined(__CPP_VERSION_AT_LEAST_11_FP8) */ #if !defined(__CUDA_NO_FP8_CONVERSIONS__) /* Construct from wider types */ /** * \ingroup CUDA_MATH_FP8X4_E4M3_STRUCT * Constructor from a pair of \p __half2 data type values, * relies on \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e4m3(const __half2 flo, const __half2 fhi) { const __nv_fp8x2_storage_t rlo = __nv_cvt_halfraw2_to_fp8x2( static_cast<__half2_raw>(flo), __NV_SATFINITE, __NV_E4M3); const __nv_fp8x2_storage_t rhi = __nv_cvt_halfraw2_to_fp8x2( static_cast<__half2_raw>(fhi), __NV_SATFINITE, __NV_E4M3); __x = __internal_pack_u16x2_to_u32(rlo, rhi); } /** * \ingroup CUDA_MATH_FP8X4_E4M3_STRUCT * Constructor from a pair of \p __nv_bfloat162 data type values, * relies on \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e4m3(const __nv_bfloat162 flo, const __nv_bfloat162 fhi) { const __nv_fp8x2_storage_t rlo = __nv_cvt_bfloat16raw2_to_fp8x2( static_cast<__nv_bfloat162_raw>(flo), __NV_SATFINITE, __NV_E4M3); const __nv_fp8x2_storage_t rhi = __nv_cvt_bfloat16raw2_to_fp8x2( static_cast<__nv_bfloat162_raw>(fhi), __NV_SATFINITE, __NV_E4M3); __x = __internal_pack_u16x2_to_u32(rlo, rhi); } /** * \ingroup CUDA_MATH_FP8X4_E4M3_STRUCT * Constructor from \p float4 vector data type, * relies on \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e4m3(const float4 f) { const float2 flo = {f.x, f.y}; const float2 fhi = {f.z, f.w}; const __nv_fp8x2_storage_t rlo = __nv_cvt_float2_to_fp8x2(flo, __NV_SATFINITE, __NV_E4M3); const __nv_fp8x2_storage_t rhi = __nv_cvt_float2_to_fp8x2(fhi, __NV_SATFINITE, __NV_E4M3); __x = __internal_pack_u16x2_to_u32(rlo, rhi); } /** * \ingroup CUDA_MATH_FP8X4_E4M3_STRUCT * Constructor from \p double4 vector data type, * relies on \p __NV_SATFINITE behavior for out-of-range values. */ explicit __CUDA_HOSTDEVICE_FP8__ __nv_fp8x4_e4m3(const double4 f) { const double2 flo = {f.x, f.y}; const double2 fhi = {f.z, f.w}; const __nv_fp8x2_storage_t rlo = __nv_cvt_double2_to_fp8x2(flo, __NV_SATFINITE, __NV_E4M3); const __nv_fp8x2_storage_t rhi = __nv_cvt_double2_to_fp8x2(fhi, __NV_SATFINITE, __NV_E4M3); __x = __internal_pack_u16x2_to_u32(rlo, rhi); } #if !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) /* Widening converts */ /** * \ingroup CUDA_MATH_FP8X4_E4M3_STRUCT * Conversion operator to \p float4 vector data type. */ explicit __CUDA_HOSTDEVICE_FP8__ operator float4() const { const __nv_fp8x2_storage_t slo = static_cast<__nv_fp8x2_storage_t>(__x); const __nv_fp8x2_storage_t shi = static_cast<__nv_fp8x2_storage_t>(__x >> 16U); float2 rlo = __internal_halfraw2_to_float2( __nv_cvt_fp8x2_to_halfraw2(slo, __NV_E4M3)); float2 rhi = __internal_halfraw2_to_float2( __nv_cvt_fp8x2_to_halfraw2(shi, __NV_E4M3)); float4 res = {rlo.x, rlo.y, rhi.x, rhi.y}; return res; } #endif /* !defined(__CUDA_NO_FP8_CONVERSION_OPERATORS__) */ #endif /* !defined(__CUDA_NO_FP8_CONVERSIONS__) */ }; #endif /* defined(__cplusplus) */ #endif /* end of include guard: __CUDA_FP8_HPP__ */