#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <cstdint>
#include <vector>
#include "./ConvUtils.h" // @manual
#include "./FbgemmBuild.h" // @manual
#include "./Utils.h" // @manual

// #define FBGEMM_MEASURE_TIME_BREAKDOWN

#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
#include <chrono>
#include <iostream>
extern double spmdm_initial_time;
extern double spmdm_transpose_uint8_time;
extern double spmdm_transpose_32xN_time;
extern double spmdm_compute_time;
extern double spmdm_transpose_Nx32_time;
extern double spmdm_run_time;
extern double sconv_run_time;
#endif

namespace fbgemm {

/**
 * @brief A class to represent a matrix in Compressed Sparse Column (CSC)
 * format.
 *
 * The second input matrix of matrix multiplication is usually weight and can
 * be sparse, and it's usually more efficient to use CSC format to represent
 * the second input matrix.
 */
class FBGEMM_API CompressedSparseColumn {
 public:
  CompressedSparseColumn(int num_of_rows, int num_of_cols);

  std::vector<std::int32_t>& ColPtr() {
    return colptr_;
  }
  std::vector<std::int16_t>& RowIdx() {
    return rowidx_;
  }
  std::vector<std::int8_t>& Values() {
    return values_;
  }
  std::vector<std::int16_t>& KHs() {
    return kh_;
  }
  std::vector<std::int16_t>& KWs() {
    return kw_;
  }
  /**
   * ICs include group: i.e. for ith input channels withint group g, ICs contain
   * g*(groups_per_input_channels) + i
   */
  std::vector<std::int16_t>& ICs() {
    return ic_;
  }

  std::size_t NumOfRows() const {
    return num_rows_;
  }
  std::size_t NumOfCols() const {
    return colptr_.size() - 1;
  }
  std::int32_t NumOfNonZeros() const {
    return colptr_.back();
  }

  /**
   * @return Total number of non-zero elements as a fraction of total
   * elements.
   */
  double Density() const;

  /**
   * @return True if the number of non-zeros per row is smaller than a small
   * threshold.
   */
  bool IsHyperSparse() const;

  /**
   * @brief Perform dense-matrix * sparse matrix.
   *
   * C += A (dense matrix) * B (this CSC matrix) if accumulation = true \n
   * C  = A (dense matrix) * B (this CSC matrix) if accumulation = false
   */
  void SpMDM(
      const block_type_t& block,
      const std::uint8_t* A,
      int lda,
      bool accumulation,
      std::int32_t* C,
      int ldc) const;

  void SparseConv(
      const conv_param_t<>& conv_p,
      const block_type_t& block,
      const std::uint8_t* A,
      std::int32_t A_zero_point,
      bool accumulation,
      std::int32_t* C,
      int ldc) const;

 private:
  const std::size_t num_rows_;
  std::vector<std::int32_t> colptr_; // corresponds to out channels
  std::vector<std::int8_t> values_;

  // For SpMDM
  std::vector<std::int16_t> rowidx_; // kh kw ic are flattened with im2col

  // For direct sparse convolution
  std::vector<std::int16_t> kh_;
  std::vector<std::int16_t> kw_;
  std::vector<std::int16_t> ic_; // in channels

  // Cache IsHyperSparse to minimize its overhead.
  mutable bool hyper_sparse_{false};

  // Whether we can reuse the cached hyper_sparse_ is determined by checking
  // if NumOfNonZeros() is same as old_nnz_ saved in previous invocation of
  // IsHyperSparse call.
  mutable std::int32_t old_nnz_{-1};
};

} // namespace fbgemm

#else
#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
#endif  // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
