/*
 * Copyright 2020-2022 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */

#if !defined(_CUPTI_PCSAMPLING_H_)
#define _CUPTI_PCSAMPLING_H_

#include <cuda.h>
#include <stdint.h>
#include <stddef.h>
#include "cupti_result.h"
#include "cupti_common.h"


#if defined(__cplusplus)
extern "C" {
#endif

#if defined(__GNUC__) && defined(CUPTI_LIB)
    #pragma GCC visibility push(default)
#endif

/**
 * \defgroup CUPTI_PCSAMPLING_API CUPTI PC Sampling API
 * Functions, types, and enums that implement the CUPTI PC Sampling API.
 * @{
 */

#ifndef CUPTI_PCSAMPLING_STRUCT_SIZE
#define CUPTI_PCSAMPLING_STRUCT_SIZE(type_, lastfield_)                     (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
#endif

#ifndef CUPTI_STALL_REASON_STRING_SIZE
#define CUPTI_STALL_REASON_STRING_SIZE                                            128
#endif

/**
 * \brief PC Sampling collection mode
 */
typedef enum
{
  /**
   * INVALID Value
   */
  CUPTI_PC_SAMPLING_COLLECTION_MODE_INVALID                   = 0,
  /**
   * Continuous mode. Kernels are not serialized in this mode.
   */
  CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS                = 1,
  /**
   * Serialized mode. Kernels are serialized in this mode.
   */
  CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED         = 2,
} CUpti_PCSamplingCollectionMode;

/**
 * \brief PC Sampling stall reasons
 */
typedef struct PACKED_ALIGNMENT
{
  /**
   * [r] Collected stall reason index
   */
  uint32_t pcSamplingStallReasonIndex;
  /**
   * [r] Number of times the PC was sampled with the stallReason.
   */
  uint32_t samples;
} CUpti_PCSamplingStallReason;

/**
 * \brief PC Sampling data
 */
typedef struct PACKED_ALIGNMENT
{
  /**
   * [w] Size of the data structure.
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [r] Unique cubin id
   */
  uint64_t cubinCrc;
  /**
   * [r] PC offset
   */
  uint64_t pcOffset;
  /**
   * The function's unique symbol index in the module.
   */
  uint32_t functionIndex;
  /**
   * Padding
   */
  uint32_t pad;
  /**
   * [r] The function name. This name string might be shared across all the records
   * including records from activity APIs representing the same function, and so it should not be
   * modified or freed until post processing of all the records is done. Once done, it is user’s responsibility to
   * free the memory using free() function.
   */
  char* functionName;
  /**
   * [r] Collected stall reason count
   */
  size_t stallReasonCount;
  /**
   * [r] Stall reason id
   * Total samples
   */
  CUpti_PCSamplingStallReason *stallReason;
  /**
   * The correlation ID of the kernel to which this result is associated. Only valid for serialized mode of pc sampling collection.
   * For continous mode of collection the correlationId will be set to 0.
   */
  uint32_t correlationId;
} CUpti_PCSamplingPCData;

/**
 * \brief PC Sampling output data format
 */
typedef enum
{
    CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_INVALID          = 0,
  /**
   * HW buffer data will be parsed during collection of data
   */
    CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED           = 1,
} CUpti_PCSamplingOutputDataFormat;

/**
 * \brief Collected PC Sampling data
 *
 */
typedef struct PACKED_ALIGNMENT
{
  /**
   * [w] Size of the data structure.
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Number of PCs to be collected
   */
  size_t collectNumPcs;
  /**
   * [r] Number of samples collected across all PCs.
   * It includes samples for user modules, samples for non-user kernels and dropped samples.
   * It includes counts for all non selected stall reasons.
   * CUPTI does not provide PC records for non-user kernels.
   * CUPTI does not provide PC records for instructions for which all selected stall reason metrics counts are zero.
   */
  uint64_t totalSamples;
  /**
   * [r] Number of samples that were dropped by hardware due to backpressure/overflow.
   */
  uint64_t droppedSamples;
  /**
   * [r] Number of PCs collected
   */
  size_t totalNumPcs;
  /**
   * [r] Number of PCs available for collection
   */
  size_t remainingNumPcs;
  /**
   * [r] Unique identifier for each range.
   * Data collected across multiple ranges in multiple buffers can be identified using range id.
   */
  uint64_t rangeId;
  /**
   * [r] Profiled PC data
   * This data struct should have enough memory to collect number of PCs mentioned in \brief collectNumPcs
   */
  CUpti_PCSamplingPCData *pPcData;
  /**
   * [r] Number of samples collected across all non user kernels PCs.
   * It includes samples for non-user kernels.
   * It includes counts for all non selected stall reasons as well.
   * CUPTI does not provide PC records for non-user kernels.
   */
  uint64_t nonUsrKernelsTotalSamples;

  /**
   * [r] Status of the hardware buffer.
   * CUPTI returns the error code CUPTI_ERROR_OUT_OF_MEMORY when hardware buffer is full.
   * When hardware buffer is full, user will get pc data as 0. To mitigate this issue, one or more of the below options can be tried:
   * 1. Increase the hardware buffer size using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE
   * 2. Decrease the thread sleep span using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN
   * 3. Decrease the sampling frequency using the attribute CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD
   */
  uint8_t hardwareBufferFull;
} CUpti_PCSamplingData;

/**
 * \brief PC Sampling configuration attributes
 *
 * PC Sampling configuration attribute types. These attributes can be read
 * using \ref cuptiPCSamplingGetConfigurationAttribute and can be written
 * using \ref cuptiPCSamplingSetConfigurationAttribute. Attributes marked
 * [r] can only be read using \ref cuptiPCSamplingGetConfigurationAttribute
 * [w] can only be written using \ref cuptiPCSamplingSetConfigurationAttribute
 * [rw] can be read using \ref cuptiPCSamplingGetConfigurationAttribute and
 * written using \ref cuptiPCSamplingSetConfigurationAttribute
 */
typedef enum
{
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_INVALID                            = 0,
  /**
   * [rw] Sampling period for PC Sampling.
   * DEFAULT - CUPTI defined value based on number of SMs
   * Valid values for the sampling
   * periods are between 5 to 31 both inclusive. This will set the
   * sampling period to (2^samplingPeriod) cycles.
   * For e.g. for sampling period = 5 to 31, cycles = 32, 64, 128,..., 2^31
   * Value is a uint32_t
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD                    = 1,
  /**
   * [w] Number of stall reasons to collect.
   * DEFAULT - All stall reasons will be collected
   * Value is a size_t
   * [w] Stall reasons to collect
   * DEFAULT - All stall reasons will be collected
   * Input value should be a pointer pointing to array of stall reason indexes
   * containing all the stall reason indexes to collect.
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON                       = 2,
  /**
   * [rw] Size of SW buffer for raw PC counter data downloaded from HW buffer
   * DEFAULT - 1 MB, which can accommodate approximately 5500 PCs
   * with all stall reasons
   * Approximately it takes 16 Bytes (and some fixed size memory)
   * to accommodate one PC with one stall reason
   * For e.g. 1 PC with 1 stall reason = 32 Bytes
   *          1 PC with 2 stall reason = 48 Bytes
   *          1 PC with 4 stall reason = 96 Bytes
   * Value is a size_t
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE                = 3,
  /**
   * [rw] Size of HW buffer in bytes
   * DEFAULT - 512 MB
   * If sampling period is too less, HW buffer can overflow
   * and drop PC data
   * Value is a size_t
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE               = 4,
  /**
   * [rw] PC Sampling collection mode
   * DEFAULT - CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS
   * Input value should be of type \ref CUpti_PCSamplingCollectionMode.
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE                    = 5,
  /**
   * [rw] Control over PC Sampling data collection range
   * Default - 0
   * 1 - Allows user to start and stop PC Sampling using APIs -
   * \ref cuptiPCSamplingStart() - Start PC Sampling
   * \ref cuptiPCSamplingStop() - Stop PC Sampling
   * Value is a uint32_t
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL          = 6,
  /**
   * [w] Value for output data format
   * Default - CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED
   * Input value should be of type \ref CUpti_PCSamplingOutputDataFormat.
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT                 = 7,
  /**
   * [w] Data buffer to hold collected PC Sampling data PARSED_DATA
   * Default - none.
   * Buffer type is void * which can point to PARSED_DATA
   * Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER               = 8,
  /**
   * [rw] Control sleep time of the worker threads created by CUPTI for various PC sampling operations.
   * CUPTI creates multiple worker threads to offload certain operations to these threads. This includes decoding of HW data to
   * the CUPTI PC sampling data and correlating PC data to SASS instructions. CUPTI wakes up these threads periodically.
   * Default - 100 milliseconds.
   * Value is a uint32_t
   */
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN  = 9,
  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_FORCE_INT                          = 0x7fffffff,
} CUpti_PCSamplingConfigurationAttributeType;

/**
 * \brief PC sampling configuration information structure
 *
 * This structure provides \ref CUpti_PCSamplingConfigurationAttributeType which can be configured
 * or queried for PC sampling configuration
 */
typedef struct
{
  /**
   * Refer \ref CUpti_PCSamplingConfigurationAttributeType for all supported attribute types
   */
  CUpti_PCSamplingConfigurationAttributeType attributeType;
  /*
   * Configure or query status for \p attributeType
   * CUPTI_SUCCESS for valid \p attributeType and \p attributeData
   * CUPTI_ERROR_INVALID_OPERATION if \p attributeData is not valid
   * CUPTI_ERROR_INVALID_PARAMETER if \p attributeType is not valid
   */
  CUptiResult attributeStatus;
  union
  {
    /**
     * Invalid Value
     */
    struct
    {
      uint64_t data[3];
    } invalidData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD
     */
    struct
    {
      uint32_t samplingPeriod;
    } samplingPeriodData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON
     */
    struct
    {
      size_t stallReasonCount;
      uint32_t *pStallReasonIndex;
    } stallReasonData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE
     */
    struct
    {
      size_t scratchBufferSize;
    } scratchBufferSizeData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE
     */
    struct
    {
      size_t hardwareBufferSize;
    } hardwareBufferSizeData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE
     */
    struct
    {
      CUpti_PCSamplingCollectionMode collectionMode;
    } collectionModeData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL
     */
    struct
    {
      uint32_t enableStartStopControl;
    } enableStartStopControlData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT
     */
    struct
    {
      CUpti_PCSamplingOutputDataFormat outputDataFormat;
    } outputDataFormatData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER
     */
    struct
    {
      void *samplingDataBuffer;
    } samplingDataBufferData;
    /**
     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_WORKER_THREAD_PERIODIC_SLEEP_SPAN
     */
    struct
    {
      uint32_t workerThreadPeriodicSleepSpan;
    } workerThreadPeriodicSleepSpanData;
    
  } attributeData;
} CUpti_PCSamplingConfigurationInfo;

/**
 * \brief PC sampling configuration structure
 *
 * This structure configures PC sampling using \ref cuptiPCSamplingSetConfigurationAttribute
 * and queries PC sampling default configuration using \ref cuptiPCSamplingGetConfigurationAttribute
 */
typedef struct
{
  /**
   * [w] Size of the data structure i.e. CUpti_PCSamplingConfigurationInfoParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Assign to NULL
   */
  void* pPriv;
  /**
   * [w] CUcontext
   */
  CUcontext ctx;
  /**
   * [w] Number of attributes to configure using \ref cuptiPCSamplingSetConfigurationAttribute or query
   * using \ref cuptiPCSamplingGetConfigurationAttribute
   */
  size_t numAttributes;
  /**
   * Refer \ref CUpti_PCSamplingConfigurationInfo
   */
  CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
} CUpti_PCSamplingConfigurationInfoParams;
#define CUpti_PCSamplingConfigurationInfoParamsSize                 CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingConfigurationInfoParams,pPCSamplingConfigurationInfo)

/**
 * \brief Write PC Sampling configuration attribute.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams
 * containing PC sampling configuration.
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
 * some invalid \p attrib.
 * \retval CUPTI_ERROR_INVALID_PARAMETER if attribute \p value is not valid
 * or any \p pParams is not valid
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingSetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams);

/**
 * \brief Read PC Sampling configuration attribute.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams
 * containing PC sampling configuration.
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
 * some invalid attribute.
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p attrib is not valid
 * or any \p pParams is not valid
 * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT indicates that
 * the \p value buffer is too small to hold the attribute value
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingGetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams);

/**
 * \brief Params for cuptiPCSamplingEnable
 */
typedef struct
{
  /**
   * [w] Size of the data structure i.e. CUpti_PCSamplingGetDataParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Assign to NULL
   */
  void* pPriv;
  /**
   * [w] CUcontext
   */
  CUcontext ctx;
  /**
   * \param pcSamplingData Data buffer to hold collected PC Sampling data PARSED_DATA
   * Buffer type is void * which can point to PARSED_DATA
   * Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA
   */
  void *pcSamplingData;
} CUpti_PCSamplingGetDataParams;
#define CUpti_PCSamplingGetDataParamsSize                           CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetDataParams, pcSamplingData)
/**
 * \brief Flush GPU PC sampling data periodically.
 *
 * Flushing of GPU PC Sampling data is required at following point to maintain uniqueness of PCs:
 * For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, after every module load-unload-load
 * For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED, after every kernel ends
 * If configuration option \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL
 * is enabled, then after every range end i.e. \brief cuptiPCSamplingStop()
 *
 * If application is profiled in \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, with disabled
 * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL, and there is no module unload,
 * user can collect data in two ways:
 * Use \brief cuptiPCSamplingGetData() API periodically
 * Use \brief cuptiPCSamplingDisable() on application exit and read GPU PC sampling data from sampling
 * data buffer passed during configuration.
 * Note: In case, \brief cuptiPCSamplingGetData() API is not called periodically, then sampling data buffer
 * passed during configuration should be large enough to hold all PCs data.
 *       \brief cuptiPCSamplingGetData() API never does device synchronization.
 *       It is possible that when the API is called there is some unconsumed data from the HW buffer. In this case
 * CUPTI provides only the data available with it at that moment.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingGetDataParams
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called without
 * enabling PC sampling.
 * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * \retval CUPTI_ERROR_OUT_OF_MEMORY indicates that the HW buffer is full
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingGetData(CUpti_PCSamplingGetDataParams *pParams);

/**
 * \brief Params for cuptiPCSamplingEnable
 */
typedef struct
{
  /**
   * [w] Size of the data structure i.e. CUpti_PCSamplingEnableParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Assign to NULL
   */
  void* pPriv;
  /**
   * [w] CUcontext
   */
  CUcontext ctx;
} CUpti_PCSamplingEnableParams;
#define CUpti_PCSamplingEnableParamsSize                           CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingEnableParams, ctx)

/**
 * \brief Enable PC sampling.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingEnableParams
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingEnable(CUpti_PCSamplingEnableParams *pParams);

/**
 * \brief Params for cuptiPCSamplingDisable
 */
typedef struct
{
  /**
   * [w] Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Assign to NULL
   */
  void* pPriv;
  /**
   * [w] CUcontext
   */
  CUcontext ctx;
} CUpti_PCSamplingDisableParams;
#define CUpti_PCSamplingDisableParamsSize                           CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingDisableParams, ctx)

/**
 * \brief Disable PC sampling.
 *
 * For application which doesn't destroy the CUDA context explicitly,
 * this API does the PC Sampling tear-down, joins threads and copies PC records in the buffer provided
 * during the PC sampling configuration. PC records which can't be accommodated in the buffer are discarded.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingDisableParams
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingDisable(CUpti_PCSamplingDisableParams *pParams);

/**
 * \brief Params for cuptiPCSamplingStart
 */
typedef struct
{
  /**
   * [w] Size of the data structure i.e. CUpti_PCSamplingStartParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Assign to NULL
   */
  void* pPriv;
  /**
   * [w] CUcontext
   */
  CUcontext ctx;
} CUpti_PCSamplingStartParams;
#define CUpti_PCSamplingStartParamsSize                             CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStartParams, ctx)

/**
 * \brief Start PC sampling.
 *
 * User can collect PC Sampling data for user-defined range specified by Start/Stop APIs.
 * This API can be used to mark starting of range. Set configuration option
 * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingStartParams
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
 * incorrect PC Sampling configuration.
 * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingStart(CUpti_PCSamplingStartParams *pParams);

/**
 * \brief Params for cuptiPCSamplingStop
 */
typedef struct
{
  /**
   * [w] Size of the data structure i.e. CUpti_PCSamplingStopParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Assign to NULL
   */
  void* pPriv;
  /**
   * [w] CUcontext
   */
  CUcontext ctx;
} CUpti_PCSamplingStopParams;
#define CUpti_PCSamplingStopParamsSize                              CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStopParams, ctx)

/**
 * \brief Stop PC sampling.
 *
 * User can collect PC Sampling data for user-defined range specified by Start/Stop APIs.
 * This API can be used to mark end of range. Set configuration option
 * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingStopParams
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
 * incorrect PC Sampling configuration.
 * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingStop(CUpti_PCSamplingStopParams *pParams);

/**
 * \brief Params for cuptiPCSamplingGetNumStallReasons
 */
typedef struct
{
  /**
   * [w] Size of the data structure i.e. CUpti_PCSamplingGetNumStallReasonsParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Assign to NULL
   */
  void* pPriv;
  /**
   * [w] CUcontext
   */
  CUcontext ctx;
  /**
   * [r] Number of stall reasons
   */
  size_t *numStallReasons;
} CUpti_PCSamplingGetNumStallReasonsParams;
#define CUpti_PCSamplingGetNumStallReasonsParamsSize                CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetNumStallReasonsParams, numStallReasons)

/**
 * \brief Get PC sampling stall reason count.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingGetNumStallReasonsParams
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingGetNumStallReasons(CUpti_PCSamplingGetNumStallReasonsParams *pParams);

/**
 * \brief Params for cuptiPCSamplingGetStallReasons
 */
typedef struct
{
  /**
   * [w] Size of the data structure i.e. CUpti_PCSamplingGetStallReasonsParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Assign to NULL
   */
  void* pPriv;
  /**
   * [w] CUcontext
   */
  CUcontext ctx;
  /**
   * [w] Number of stall reasons
   */
  size_t numStallReasons;
  /**
   * [r] Stall reason index
   */
  uint32_t *stallReasonIndex;
  /**
   * [r] Stall reasons name
   */
  char **stallReasons;
} CUpti_PCSamplingGetStallReasonsParams;
#define CUpti_PCSamplingGetStallReasonsParamsSize                   CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetStallReasonsParams, stallReasons)

/**
 * \brief Get PC sampling stall reasons.
 *
 * \param pParams A pointer to \ref CUpti_PCSamplingGetStallReasonsParams
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
 * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
 * does not support the API
 */
CUptiResult CUPTIAPI cuptiPCSamplingGetStallReasons(CUpti_PCSamplingGetStallReasonsParams *pParams);


/**
 * \brief Params for cuptiGetSassToSourceCorrelation
 */
typedef struct CUpti_GetSassToSourceCorrelationParams {
  /**
   * [w] Size of the data structure i.e. CUpti_GetSassToSourceCorrelationParamsSize
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Pointer to cubin binary where function belongs.
   */
  const void* cubin;
  /**
   * [w] Function name to which PC belongs.
   */
  const char *functionName;
  /**
   * [w] Size of cubin binary.
   */
  size_t cubinSize;
  /**
   * [r] Line number in the source code.
   */
  uint32_t lineNumber;
  /**
   * [w] PC offset
   */
  uint64_t pcOffset;
  /**
   * [r] Path for the source file.
   */
  char *fileName;
  /**
   * [r] Path for the directory of source file.
   */
  char *dirName;
} CUpti_GetSassToSourceCorrelationParams;

#define CUpti_GetSassToSourceCorrelationParamsSize     CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetSassToSourceCorrelationParams, dirName)

/**
 * \brief SASS to Source correlation.
 *
 * \param pParams A pointer to \ref CUpti_GetSassToSourceCorrelationParams
 *
 * It is expected from user to free allocated memory for fileName and dirName after use.
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if either of the parameters cubin or functionName
 * is NULL or cubinSize is zero or size field is not set correctly.
 * \retval CUPTI_ERROR_INVALID_MODULE provided cubin is invalid.
 * \retval CUPTI_ERROR_UNKNOWN an internal error occurred.
 * This error code is also used for cases when the function is not present in the module.
 * A better error code will be returned in the future release.
 */
CUptiResult CUPTIAPI cuptiGetSassToSourceCorrelation(CUpti_GetSassToSourceCorrelationParams *pParams);

/**
 * \brief Params for cuptiGetCubinCrc
 */
typedef struct {
  /**
   * [w] Size of configuration structure.
   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
   * available in the structure. Used to preserve backward compatibility.
   */
  size_t size;
  /**
   * [w] Size of cubin binary.
   */
  size_t cubinSize;
  /**
   * [w] Pointer to cubin binary
   */
  const void* cubin;
  /**
   * [r] Computed CRC will be stored in it.
   */
  uint64_t cubinCrc;
} CUpti_GetCubinCrcParams;
#define CUpti_GetCubinCrcParamsSize     CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetCubinCrcParams, cubinCrc)

/**
 * \brief Get the CRC of cubin.
 *
 * This function returns the CRC of provided cubin binary.
 *
 * \param pParams A pointer to \ref CUpti_GetCubinCrcParams
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if parameter cubin is NULL or
 * provided cubinSize is zero or size field is not set.
 */
CUptiResult CUPTIAPI cuptiGetCubinCrc(CUpti_GetCubinCrcParams *pParams);

/**
 * \brief Function type for callback used by CUPTI to request crc of
 * loaded module.
 *
 * This callback function ask for crc of provided module in function.
 * The provided crc will be stored in PC sampling records i.e. in the field 'cubinCrc' of the PC sampling
 * struct CUpti_PCSamplingPCData. The CRC is uses during the offline source correlation to uniquely identify the module.
 *
 * \param cubin The pointer to cubin binary
 * \param cubinSize The size of cubin binary.
 * \param cubinCrc Returns the computed crc of cubin.
 */
typedef void (CUPTIAPI *CUpti_ComputeCrcCallbackFunc)(
    const void* cubin,
    size_t cubinSize,
    uint64_t *cubinCrc);

/**
 * \brief Register callback function with CUPTI to use
 * your own algorithm to compute cubin crc.
 *
 * This function registers a callback function and it gets called
 * from CUPTI when a CUDA module is loaded.
 *
 * \param funcComputeCubinCrc callback is invoked when a CUDA module
 * is loaded.
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p funcComputeCubinCrc is NULL.
 */
CUptiResult CUPTIAPI cuptiRegisterComputeCrcCallback(CUpti_ComputeCrcCallbackFunc funcComputeCubinCrc);

/** @} */ /* END CUPTI_PCSAMPLING_API */

#if defined(__GNUC__) && defined(CUPTI_LIB)
    #pragma GCC visibility pop
#endif

#if defined(__cplusplus)
}
#endif

#endif /*_CUPTI_PCSAMPLING_H_*/
