/*
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
/*
* cudnn_ops : cuDNN's basic definitions and basic operations.
*/
#if !defined(CUDNN_OPS_H_)
#define CUDNN_OPS_H_
#include <stdint.h>
#include "cudnn_version.h"
#include "cudnn_graph.h"
/* These version numbers are autogenerated, do not edit manually. */
#define CUDNN_OPS_MAJOR 9
#define CUDNN_OPS_MINOR 1
#define CUDNN_OPS_PATCH 0
#if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL)
#error Version mismatch in cuDNN OPS INFER!!!
#endif
#if defined(__cplusplus)
extern "C" {
#endif
/* Data structures to represent Image/Filter and the Neural Network Layer */
typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t CUDNN_DEPRECATED;
typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t CUDNN_DEPRECATED;
typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t CUDNN_DEPRECATED;
typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t CUDNN_DEPRECATED;
typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t CUDNN_DEPRECATED;
typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t CUDNN_DEPRECATED;
/*
* CUDNN Determinism
*/
typedef enum {
CUDNN_NON_DETERMINISTIC = 0,
CUDNN_DETERMINISTIC = 1,
} cudnnDeterminism_t;
/* Create an instance of a generic Tensor descriptor */
cudnnStatus_t CUDNNWINAPI
cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
cudnnStatus_t CUDNNWINAPI
cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
cudnnTensorFormat_t format,
cudnnDataType_t dataType, /* image data type */
int n, /* number of inputs (batch size) */
int c, /* number of input feature maps */
int h, /* height of input section */
int w); /* width of input section */
cudnnStatus_t CUDNNWINAPI
cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
cudnnDataType_t dataType, /* image data type */
int n, /* number of inputs (batch size) */
int c, /* number of input feature maps */
int h, /* height of input section */
int w, /* width of input section */
int nStride,
int cStride,
int hStride,
int wStride);
cudnnStatus_t CUDNNWINAPI
cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
cudnnDataType_t *dataType, /* image data type */
int *n, /* number of inputs (batch size) */
int *c, /* number of input feature maps */
int *h, /* height of input section */
int *w, /* width of input section */
int *nStride,
int *cStride,
int *hStride,
int *wStride);
cudnnStatus_t CUDNNWINAPI
cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
cudnnDataType_t dataType,
int nbDims,
const int dimA[],
const int strideA[]);
cudnnStatus_t CUDNNWINAPI
cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
cudnnTensorFormat_t format,
cudnnDataType_t dataType,
int nbDims,
const int dimA[]);
cudnnStatus_t CUDNNWINAPI
cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
int nbDimsRequested,
cudnnDataType_t *dataType,
int *nbDims,
int dimA[],
int strideA[]);
cudnnStatus_t CUDNNWINAPI
cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size);
/* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride
1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
input_stride : c x h x h_stride
feature_stride : h x h_stride
h_stride : >= w ( h_stride = w if no padding)
w_stride : 1
2)Example of all images in row major with features maps interleaved
input_stride : c x h x h_stride
feature_stride : 1
h_stride : w x c
w_stride : c
3)Example of all images in column major order one batch of features after the other (with optional padding on column)
input_stride : c x w x w_stride
feature_stride : w x w_stride
h_stride : 1
w_stride : >= h
*/
/* Destroy an instance of Tensor4d descriptor */
cudnnStatus_t CUDNNWINAPI
cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
/* Fold/unfold transforms */
typedef enum {
CUDNN_TRANSFORM_FOLD = 0U,
CUDNN_TRANSFORM_UNFOLD = 1U,
} cudnnFoldingDirection_t;
/** Create a destination descriptor for cudnnTransformTensor */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
const cudnnTensorDescriptor_t srcDesc,
cudnnTensorDescriptor_t destDesc,
size_t *destSizeInBytes);
/** Create an empty tensor transform descriptor */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
/** Initialize a previously created tensor transform descriptor. */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
const uint32_t nbDims,
const cudnnTensorFormat_t destFormat,
const int32_t padBeforeA[],
const int32_t padAfterA[],
const uint32_t foldA[],
const cudnnFoldingDirection_t direction);
/**
* Retrieves the values stored in a previously initialized tensor transform
* descriptor.
*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
uint32_t nbDimsRequested,
cudnnTensorFormat_t *destFormat,
int32_t padBeforeA[],
int32_t padAfterA[],
uint32_t foldA[],
cudnnFoldingDirection_t *direction);
/**
* Destroys a previously created tensor transform descriptor.
*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
/* Tensor layout conversion helper (y = alpha * x + beta * y) */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnTransformTensor(cudnnHandle_t handle,
const void *alpha,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t yDesc,
void *y);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnTransformTensorEx(cudnnHandle_t handle,
const cudnnTensorTransformDescriptor_t transDesc,
const void *alpha,
const cudnnTensorDescriptor_t srcDesc,
const void *srcData,
const void *beta,
const cudnnTensorDescriptor_t destDesc,
void *destData);
/* Tensor Bias addition : C = alpha * A + beta * C */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnAddTensor(cudnnHandle_t handle,
const void *alpha,
const cudnnTensorDescriptor_t aDesc,
const void *A,
const void *beta,
const cudnnTensorDescriptor_t cDesc,
void *C);
/*
* CUDNN OpTensor op type
*/
typedef enum {
CUDNN_OP_TENSOR_ADD = 0,
CUDNN_OP_TENSOR_MUL = 1,
CUDNN_OP_TENSOR_MIN = 2,
CUDNN_OP_TENSOR_MAX = 3,
CUDNN_OP_TENSOR_SQRT = 4,
CUDNN_OP_TENSOR_NOT = 5,
} cudnnOpTensorOp_t;
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
cudnnOpTensorOp_t opTensorOp,
cudnnDataType_t opTensorCompType,
cudnnNanPropagation_t opTensorNanOpt);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
cudnnOpTensorOp_t *opTensorOp,
cudnnDataType_t *opTensorCompType,
cudnnNanPropagation_t *opTensorNanOpt);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
/* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
/* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnOpTensor(cudnnHandle_t handle,
const cudnnOpTensorDescriptor_t opTensorDesc,
const void *alpha1,
const cudnnTensorDescriptor_t aDesc,
const void *A,
const void *alpha2,
const cudnnTensorDescriptor_t bDesc,
const void *B,
const void *beta,
const cudnnTensorDescriptor_t cDesc,
void *C);
/*
* CUDNN ReduceTensor indices type
*/
typedef enum {
CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
} cudnnReduceTensorIndices_t CUDNN_DEPRECATED;
/*
* CUDNN tensor indices type size (all unsigned)
* Currently not supported, default is 32 bit unsigned.
*/
typedef enum {
CUDNN_32BIT_INDICES = 0,
CUDNN_64BIT_INDICES = 1,
CUDNN_16BIT_INDICES = 2,
CUDNN_8BIT_INDICES = 3,
} cudnnIndicesType_t CUDNN_DEPRECATED;
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
cudnnReduceTensorOp_t reduceTensorOp,
cudnnDataType_t reduceTensorCompType,
cudnnNanPropagation_t reduceTensorNanOpt,
cudnnReduceTensorIndices_t reduceTensorIndices,
cudnnIndicesType_t reduceTensorIndicesType);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
cudnnReduceTensorOp_t *reduceTensorOp,
cudnnDataType_t *reduceTensorCompType,
cudnnNanPropagation_t *reduceTensorNanOpt,
cudnnReduceTensorIndices_t *reduceTensorIndices,
cudnnIndicesType_t *reduceTensorIndicesType);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
/* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
* output tensors */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetReductionIndicesSize(cudnnHandle_t handle,
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
const cudnnTensorDescriptor_t aDesc,
const cudnnTensorDescriptor_t cDesc,
size_t *sizeInBytes);
/* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
* tensors */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
const cudnnTensorDescriptor_t aDesc,
const cudnnTensorDescriptor_t cDesc,
size_t *sizeInBytes);
/* Tensor operation : C = reduce op( alpha * A ) + beta * C */
/* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
/* The indices space is ignored for reduce ops other than min or max. */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnReduceTensor(cudnnHandle_t handle,
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
void *indices,
size_t indicesSizeInBytes,
void *workspace,
size_t workspaceSizeInBytes,
const void *alpha,
const cudnnTensorDescriptor_t aDesc,
const void *A,
const void *beta,
const cudnnTensorDescriptor_t cDesc,
void *C);
/* Set all values of a tensor to a given value : y[i] = value[0] */
cudnnStatus_t CUDNNWINAPI
cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
/* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
/* Create an instance of FilterStruct */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
cudnnDataType_t dataType, /* image data type */
cudnnTensorFormat_t format,
int k, /* number of output feature maps */
int c, /* number of input feature maps */
int h, /* height of each input filter */
int w); /* width of each input filter */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
cudnnDataType_t *dataType, /* image data type */
cudnnTensorFormat_t *format,
int *k, /* number of output feature maps */
int *c, /* number of input feature maps */
int *h, /* height of each input filter */
int *w); /* width of each input filter */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
cudnnDataType_t dataType, /* image data type */
cudnnTensorFormat_t format,
int nbDims,
const int filterDimA[]);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
int nbDimsRequested,
cudnnDataType_t *dataType, /* image data type */
cudnnTensorFormat_t *format,
int *nbDims,
int filterDimA[]);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnTransformFilter(cudnnHandle_t handle,
const cudnnTensorTransformDescriptor_t transDesc,
const void *alpha,
const cudnnFilterDescriptor_t srcDesc,
const void *srcData,
const void *beta,
const cudnnFilterDescriptor_t destDesc,
void *destData);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
/*
* softmax algorithm
*/
typedef enum {
CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */
CUDNN_SOFTMAX_LOG = 2
} cudnnSoftmaxAlgorithm_t;
typedef enum {
CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
} cudnnSoftmaxMode_t;
/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
/* Function to perform forward softmax */
cudnnStatus_t CUDNNWINAPI
cudnnSoftmaxForward(cudnnHandle_t handle,
cudnnSoftmaxAlgorithm_t algo,
cudnnSoftmaxMode_t mode,
const void *alpha,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t yDesc,
void *y);
/*
* pooling mode
*/
typedef enum {
CUDNN_POOLING_MAX = 0,
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
CUDNN_POOLING_MAX_DETERMINISTIC = 3
} cudnnPoolingMode_t CUDNN_DEPRECATED;
/* Create an instance of pooling descriptor */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
cudnnPoolingMode_t mode,
cudnnNanPropagation_t maxpoolingNanOpt,
int windowHeight,
int windowWidth,
int verticalPadding,
int horizontalPadding,
int verticalStride,
int horizontalStride);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
cudnnPoolingMode_t *mode,
cudnnNanPropagation_t *maxpoolingNanOpt,
int *windowHeight,
int *windowWidth,
int *verticalPadding,
int *horizontalPadding,
int *verticalStride,
int *horizontalStride);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
const cudnnPoolingMode_t mode,
const cudnnNanPropagation_t maxpoolingNanOpt,
int nbDims,
const int windowDimA[],
const int paddingA[],
const int strideA[]);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
int nbDimsRequested,
cudnnPoolingMode_t *mode,
cudnnNanPropagation_t *maxpoolingNanOpt,
int *nbDims,
int windowDimA[],
int paddingA[],
int strideA[]);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
const cudnnTensorDescriptor_t inputTensorDesc,
int nbDims,
int outputTensorDimA[]);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
const cudnnTensorDescriptor_t inputTensorDesc,
int *n,
int *c,
int *h,
int *w);
/* Destroy an instance of pooling descriptor */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
/* Function to perform forward pooling */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnPoolingForward(cudnnHandle_t handle,
const cudnnPoolingDescriptor_t poolingDesc,
const void *alpha,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t yDesc,
void *y);
/* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
cudnnActivationMode_t mode,
cudnnNanPropagation_t reluNanOpt,
double coef); /* ceiling for clipped RELU, alpha for ELU */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
cudnnActivationMode_t *mode,
cudnnNanPropagation_t *reluNanOpt,
double *coef); /* ceiling for clipped RELU, alpha for ELU */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
/* Function to perform forward activation */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnActivationForward(cudnnHandle_t handle,
cudnnActivationDescriptor_t activationDesc,
const void *alpha,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t yDesc,
void *y);
/*
* Create an instance of LRN (Local Response Normalization) descriptor
* Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
*/
cudnnStatus_t CUDNNWINAPI
cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc);
#define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */
#define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */
#define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */
#define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */
/* LRN layer mode */
typedef enum {
CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */
} cudnnLRNMode_t;
/*
* Uses a window [center-lookBehind, center+lookAhead], where
* lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
* Values of double parameters cast to tensor data type.
*/
cudnnStatus_t CUDNNWINAPI
cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
/*
* Retrieve the settings currently stored in an LRN layer descriptor
* Any of the provided pointers can be NULL (no corresponding value will be returned)
*/
cudnnStatus_t CUDNNWINAPI
cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK);
/* Destroy an instance of LRN descriptor */
cudnnStatus_t CUDNNWINAPI
cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
/* LRN functions: output = alpha * normalize(x) + beta * old_y */
/* LRN cross-channel forward computation. Double parameters cast to tensor data type */
cudnnStatus_t CUDNNWINAPI
cudnnLRNCrossChannelForward(cudnnHandle_t handle,
cudnnLRNDescriptor_t normDesc,
cudnnLRNMode_t lrnMode,
const void *alpha,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t yDesc,
void *y);
typedef enum {
CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
} cudnnDivNormMode_t;
/* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */
cudnnStatus_t CUDNNWINAPI
cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
cudnnLRNDescriptor_t normDesc,
cudnnDivNormMode_t mode,
const void *alpha,
const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
const void *x,
const void *means, /* if NULL, means are assumed to be zero */
void *temp,
void *temp2,
const void *beta,
const cudnnTensorDescriptor_t yDesc,
void *y);
typedef enum {
/* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
CUDNN_BATCHNORM_PER_ACTIVATION = 0,
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
CUDNN_BATCHNORM_SPATIAL = 1,
/*
* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
* May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
*/
CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
} cudnnBatchNormMode_t CUDNN_DEPRECATED;
#define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
/*
* Derives a tensor descriptor from layer data descriptor for BatchNormalization
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
* bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
const cudnnTensorDescriptor_t xDesc,
cudnnBatchNormMode_t mode);
typedef enum {
CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
} cudnnBatchNormOps_t CUDNN_DEPRECATED;
/*
* Performs Batch Normalization during Inference:
* y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
* with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
* according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
* above for notes on function arguments.
*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
cudnnBatchNormMode_t mode,
const void *alpha, /* alpha[0] = result blend factor */
const void *beta, /* beta[0] = dest layer blend factor */
const cudnnTensorDescriptor_t xDesc,
const void *x, /* NxCxHxW */
const cudnnTensorDescriptor_t yDesc,
void *y, /* NxCxHxW */
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
const void *bnScale,
const void *bnBias,
const void *estimatedMean,
const void *estimatedVariance,
double epsilon);
typedef enum {
/* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
CUDNN_NORM_PER_ACTIVATION = 0,
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
CUDNN_NORM_PER_CHANNEL = 1,
} cudnnNormMode_t CUDNN_DEPRECATED;
typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t CUDNN_DEPRECATED;
/*
* Derives a tensor descriptor from layer data descriptor for Normalization
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
* normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
cudnnTensorDescriptor_t derivedNormMeanVarDesc,
const cudnnTensorDescriptor_t xDesc,
cudnnNormMode_t mode,
int groupCnt); /* Place hold for future work, should be set to 1 now*/
typedef enum {
CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
} cudnnNormOps_t CUDNN_DEPRECATED;
/*
* Performs Normalization during Inference:
* y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k]
* with normScale, normBias, runningMean, runningInvVariance tensors indexed
* according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
* above for notes on function arguments.
*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnNormalizationForwardInference(cudnnHandle_t handle,
cudnnNormMode_t mode,
cudnnNormOps_t normOps,
cudnnNormAlgo_t algo,
const void *alpha, /* alpha[0] = result blend factor */
const void *beta, /* beta[0] = dest layer blend factor */
const cudnnTensorDescriptor_t xDesc,
const void *x, /* NxCxHxW */
const cudnnTensorDescriptor_t normScaleBiasDesc,
const void *normScale,
const void *normBias,
const cudnnTensorDescriptor_t normMeanVarDesc,
const void *estimatedMean,
const void *estimatedVariance,
const cudnnTensorDescriptor_t zDesc,
const void *z,
cudnnActivationDescriptor_t activationDesc,
const cudnnTensorDescriptor_t yDesc,
void *y, /* NxCxHxW */
double epsilon,
int groupCnt); /* Place hold for future work*/
/* APIs for spatial transformer network*/
typedef enum {
CUDNN_SAMPLER_BILINEAR = 0,
} cudnnSamplerType_t;
cudnnStatus_t CUDNNWINAPI
cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc);
cudnnStatus_t CUDNNWINAPI
cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
cudnnSamplerType_t samplerType,
cudnnDataType_t dataType,
const int nbDims,
const int dimA[]);
cudnnStatus_t CUDNNWINAPI
cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
cudnnStatus_t CUDNNWINAPI
cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
const cudnnSpatialTransformerDescriptor_t stDesc,
const void *theta,
void *grid);
cudnnStatus_t CUDNNWINAPI
cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
cudnnSpatialTransformerDescriptor_t stDesc,
const void *alpha,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *grid,
const void *beta,
cudnnTensorDescriptor_t yDesc,
void *y);
typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t;
cudnnStatus_t CUDNNWINAPI
cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc);
cudnnStatus_t CUDNNWINAPI
cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
/*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */
cudnnStatus_t CUDNNWINAPI
cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes);
/*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */
cudnnStatus_t CUDNNWINAPI
cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes);
cudnnStatus_t CUDNNWINAPI
cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
cudnnHandle_t handle,
float dropout,
void *states,
size_t stateSizeInBytes,
unsigned long long seed);
/* Restores the dropout descriptor to a previously saved-off state */
cudnnStatus_t CUDNNWINAPI
cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
cudnnHandle_t handle,
float dropout,
void *states,
size_t stateSizeInBytes,
unsigned long long seed);
cudnnStatus_t CUDNNWINAPI
cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
cudnnHandle_t handle,
float *dropout,
void **states,
unsigned long long *seed);
cudnnStatus_t CUDNNWINAPI
cudnnDropoutForward(cudnnHandle_t handle,
const cudnnDropoutDescriptor_t dropoutDesc,
const cudnnTensorDescriptor_t xdesc,
const void *x,
const cudnnTensorDescriptor_t ydesc,
void *y,
void *reserveSpace,
size_t reserveSpaceSizeInBytes);
/* TODO: move these enums out to the appropriate submodule */
typedef enum {
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8
} cudnnConvolutionFwdAlgo_t;
typedef enum {
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7
} cudnnConvolutionBwdFilterAlgo_t;
typedef enum {
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
} cudnnConvolutionBwdDataAlgo_t;
typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
/*
* \brief Cross-library version checker.
* This function is implemented differently in each sub-library. Each sublib
* checks whether its own version matches that of its dependencies.
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
*/
cudnnStatus_t CUDNNWINAPI
cudnnOpsVersionCheck(void);
/* Function to perform backward softmax */
cudnnStatus_t CUDNNWINAPI
cudnnSoftmaxBackward(cudnnHandle_t handle,
cudnnSoftmaxAlgorithm_t algo,
cudnnSoftmaxMode_t mode,
const void *alpha,
const cudnnTensorDescriptor_t yDesc,
const void *y,
const cudnnTensorDescriptor_t dyDesc,
const void *dy,
const void *beta,
const cudnnTensorDescriptor_t dxDesc,
void *dx);
/* Function to perform backward pooling */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnPoolingBackward(cudnnHandle_t handle,
const cudnnPoolingDescriptor_t poolingDesc,
const void *alpha,
const cudnnTensorDescriptor_t yDesc,
const void *y,
const cudnnTensorDescriptor_t dyDesc,
const void *dy,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t dxDesc,
void *dx);
/* Function to perform backward activation */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnActivationBackward(cudnnHandle_t handle,
cudnnActivationDescriptor_t activationDesc,
const void *alpha,
const cudnnTensorDescriptor_t yDesc,
const void *y,
const cudnnTensorDescriptor_t dyDesc,
const void *dy,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t dxDesc,
void *dx);
/* LRN cross-channel backward computation. Double parameters cast to tensor data type */
cudnnStatus_t CUDNNWINAPI
cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
cudnnLRNDescriptor_t normDesc,
cudnnLRNMode_t lrnMode,
const void *alpha,
const cudnnTensorDescriptor_t yDesc,
const void *y,
const cudnnTensorDescriptor_t dyDesc,
const void *dy,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t dxDesc,
void *dx);
cudnnStatus_t CUDNNWINAPI
cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
cudnnLRNDescriptor_t normDesc,
cudnnDivNormMode_t mode,
const void *alpha,
const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
const void *x,
const void *means, /* if NULL, means are assumed to be zero */
const void *dy,
void *temp,
void *temp2,
const void *beta,
const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
void *dx, /* output x differential */
void *dMeans); /* output means differential, can be NULL */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle,
cudnnBatchNormMode_t mode,
cudnnBatchNormOps_t bnOps,
const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t zDesc,
const cudnnTensorDescriptor_t yDesc,
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
const cudnnActivationDescriptor_t activationDesc,
size_t *sizeInBytes);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle,
cudnnBatchNormMode_t mode,
cudnnBatchNormOps_t bnOps,
const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t yDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnTensorDescriptor_t dzDesc,
const cudnnTensorDescriptor_t dxDesc,
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
const cudnnActivationDescriptor_t activationDesc,
size_t *sizeInBytes);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle,
cudnnBatchNormMode_t mode,
cudnnBatchNormOps_t bnOps,
const cudnnActivationDescriptor_t activationDesc,
const cudnnTensorDescriptor_t xDesc,
size_t *sizeInBytes);
/* Computes y = BN(x). Also accumulates moving averages of mean and inverse variances */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnBatchNormalizationForwardTraining(
cudnnHandle_t handle,
cudnnBatchNormMode_t mode,
const void *alpha, /* alpha[0] = result blend factor */
const void *beta, /* beta[0] = dest layer blend factor */
const cudnnTensorDescriptor_t xDesc,
const void *x, /* NxCxHxW */
const cudnnTensorDescriptor_t yDesc,
void *y, /* NxCxHxW */
/* Shared desc for the next 6 tensors in the argument list.
Data type to be set as follows:
type = (typeOf(x) == double) ? double : float
Dimensions for this descriptor depend on normalization mode
- Spatial Normalization : tensors are expected to have dims 1xCx1x1
(normalization is performed across NxHxW)
- Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
(normalization is performed across N) */
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
/* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
const void *bnScale,
const void *bnBias,
/* MUST use factor=1 in the very first call of a complete training cycle.
Use a factor=1/(1+n) at N-th call to the function to get
Cumulative Moving Average (CMA) behavior
CMA[n] = (x[1]+...+x[n])/n
Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
double exponentialAverageFactor,
/* Used in Training phase only.
runningMean = newMean*factor + runningMean*(1-factor) */
void *resultRunningMean,
/* Output in training mode, input in inference. Is the moving average
of variance[x] (factor is applied in the same way as for runningMean) */
void *resultRunningVariance,
/* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
double epsilon,
/* Optionally save intermediate results from the forward pass here
- can be reused to speed up backward pass. NULL if unused */
void *resultSaveMean,
void *resultSaveInvVariance);
/* Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnBatchNormalizationForwardTrainingEx(
cudnnHandle_t handle,
cudnnBatchNormMode_t mode,
cudnnBatchNormOps_t bnOps,
const void *alpha, /* alpha[0] = result blend factor */
const void *beta, /* beta[0] = dest layer blend factor */
const cudnnTensorDescriptor_t xDesc,
const void *xData,
const cudnnTensorDescriptor_t zDesc,
const void *zData,
const cudnnTensorDescriptor_t yDesc,
void *yData,
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
const void *bnScale,
const void *bnBias,
double exponentialAverageFactor,
void *resultRunningMean,
void *resultRunningVariance,
/* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
double epsilon,
/* Optionally save intermediate results from the forward pass here
- can be reused to speed up backward pass. NULL if unused */
void *resultSaveMean,
void *resultSaveInvVariance,
cudnnActivationDescriptor_t activationDesc,
void *workspace,
size_t workSpaceSizeInBytes,
void *reserveSpace,
size_t reserveSpaceSizeInBytes);
/* Performs backward pass of Batch Normalization layer. Returns x gradient,
* bnScale gradient and bnBias gradient */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnBatchNormalizationBackward(cudnnHandle_t handle,
cudnnBatchNormMode_t mode,
const void *alphaDataDiff,
const void *betaDataDiff,
const void *alphaParamDiff,
const void *betaParamDiff,
const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
const void *x,
const cudnnTensorDescriptor_t dyDesc,
const void *dy,
const cudnnTensorDescriptor_t dxDesc,
void *dx,
/* Shared tensor desc for the 4 tensors below */
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
const void *bnScale, /* bnBias doesn't affect backpropagation */
/* scale and bias diff are not backpropagated below this layer */
void *dBnScaleResult,
void *dBnBiasResult,
/* Same epsilon as forward pass */
double epsilon,
/* Optionally cached intermediate results from
forward pass */
const void *savedMean,
const void *savedInvVariance);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle,
cudnnBatchNormMode_t mode,
cudnnBatchNormOps_t bnOps,
const void *alphaDataDiff,
const void *betaDataDiff,
const void *alphaParamDiff,
const void *betaParamDiff,
const cudnnTensorDescriptor_t xDesc,
const void *xData,
const cudnnTensorDescriptor_t yDesc,
const void *yData,
const cudnnTensorDescriptor_t dyDesc,
const void *dyData,
const cudnnTensorDescriptor_t dzDesc,
void *dzData,
const cudnnTensorDescriptor_t dxDesc,
void *dxData,
/* Shared tensor desc for the 4 tensors below */
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
const void *bnScaleData,
const void *bnBiasData, /* needed if there is activation */
void *dBnScaleData,
void *dBnBiasData,
double epsilon, /* Same epsilon as forward pass */
/* Optionally cached intermediate results from
forward pass */
const void *savedMean,
const void *savedInvVariance,
cudnnActivationDescriptor_t activationDesc,
void *workSpace,
size_t workSpaceSizeInBytes,
void *reserveSpace,
size_t reserveSpaceSizeInBytes);
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle,
cudnnNormMode_t mode,
cudnnNormOps_t normOps,
cudnnNormAlgo_t algo,
const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t zDesc,
const cudnnTensorDescriptor_t yDesc,
const cudnnTensorDescriptor_t normScaleBiasDesc,
const cudnnActivationDescriptor_t activationDesc,
const cudnnTensorDescriptor_t normMeanVarDesc,
size_t *sizeInBytes,
int groupCnt); /* Place hold for future work, should be set to 1 now*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle,
cudnnNormMode_t mode,
cudnnNormOps_t normOps,
cudnnNormAlgo_t algo,
const cudnnTensorDescriptor_t xDesc,
const cudnnTensorDescriptor_t yDesc,
const cudnnTensorDescriptor_t dyDesc,
const cudnnTensorDescriptor_t dzDesc,
const cudnnTensorDescriptor_t dxDesc,
const cudnnTensorDescriptor_t dNormScaleBiasDesc,
const cudnnActivationDescriptor_t activationDesc,
const cudnnTensorDescriptor_t normMeanVarDesc,
size_t *sizeInBytes,
int groupCnt); /* Place hold for future work, should be set to 1 now*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle,
cudnnNormMode_t mode,
cudnnNormOps_t normOps,
cudnnNormAlgo_t algo,
const cudnnActivationDescriptor_t activationDesc,
const cudnnTensorDescriptor_t xDesc,
size_t *sizeInBytes,
int groupCnt); /* Place hold for future work, should be set to 1 now*/
/* Computes y = relu(Norm(x) + z). Also accumulates moving averages of mean and inverse variances */
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnNormalizationForwardTraining(cudnnHandle_t handle,
cudnnNormMode_t mode,
cudnnNormOps_t normOps,
cudnnNormAlgo_t algo,
const void *alpha, /* alpha[0] = result blend factor */
const void *beta, /* beta[0] = dest layer blend factor */
const cudnnTensorDescriptor_t xDesc,
const void *xData,
const cudnnTensorDescriptor_t normScaleBiasDesc,
const void *normScale,
const void *normBias,
double exponentialAverageFactor,
const cudnnTensorDescriptor_t normMeanVarDesc,
void *resultRunningMean,
void *resultRunningVariance,
/* Has to be >= 0. Should be the same in forward and backward functions. */
double epsilon,
/* Optionally save intermediate results from the forward pass here
- can be reused to speed up backward pass. NULL if unused */
void *resultSaveMean,
void *resultSaveInvVariance,
cudnnActivationDescriptor_t activationDesc,
const cudnnTensorDescriptor_t zDesc,
const void *zData,
const cudnnTensorDescriptor_t yDesc,
void *yData,
void *workspace,
size_t workSpaceSizeInBytes,
void *reserveSpace,
size_t reserveSpaceSizeInBytes,
int groupCnt); /* Place hold for future work, should be set to 1 now*/
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
cudnnNormalizationBackward(cudnnHandle_t handle,
cudnnNormMode_t mode,
cudnnNormOps_t normOps,
cudnnNormAlgo_t algo,
const void *alphaDataDiff,
const void *betaDataDiff,
const void *alphaParamDiff,
const void *betaParamDiff,
const cudnnTensorDescriptor_t xDesc,
const void *xData,
const cudnnTensorDescriptor_t yDesc,
const void *yData,
const cudnnTensorDescriptor_t dyDesc,
const void *dyData,
const cudnnTensorDescriptor_t dzDesc,
void *dzData,
const cudnnTensorDescriptor_t dxDesc,
void *dxData,
/* Shared tensor desc for the 4 tensors below */
const cudnnTensorDescriptor_t dNormScaleBiasDesc,
const void *normScaleData,
const void *normBiasData, /* needed if there is activation */
void *dNormScaleData,
void *dNormBiasData,
double epsilon, /* Same epsilon as forward pass */
const cudnnTensorDescriptor_t normMeanVarDesc,
/* Optionally cached intermediate results from
forward pass */
const void *savedMean,
const void *savedInvVariance,
cudnnActivationDescriptor_t activationDesc,
void *workSpace,
size_t workSpaceSizeInBytes,
void *reserveSpace,
size_t reserveSpaceSizeInBytes,
int groupCnt); /* Place hold for future work, should be set to 1 now*/
cudnnStatus_t CUDNNWINAPI
cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
const cudnnSpatialTransformerDescriptor_t stDesc,
const void *dgrid,
void *dtheta);
cudnnStatus_t CUDNNWINAPI
cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
cudnnSpatialTransformerDescriptor_t stDesc,
const void *alpha,
const cudnnTensorDescriptor_t xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t dxDesc,
void *dx,
const void *alphaDgrid,
const cudnnTensorDescriptor_t dyDesc,
const void *dy,
const void *grid,
const void *betaDgrid,
void *dgrid);
cudnnStatus_t CUDNNWINAPI
cudnnDropoutBackward(cudnnHandle_t handle,
const cudnnDropoutDescriptor_t dropoutDesc,
const cudnnTensorDescriptor_t dydesc,
const void *dy,
const cudnnTensorDescriptor_t dxdesc,
void *dx,
void *reserveSpace,
size_t reserveSpaceSizeInBytes);
#if defined(__cplusplus)
}
#endif
#endif /* CUDNN_OPS_H_ */