blob: ce794e25736373785b240b1f1b4fdf5f1a27665f [file] [log] [blame] [edit]
//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file contains the definitions of the enumerations and flags
/// associated with NVVM Intrinsics, along with some helper functions.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
#define LLVM_IR_NVVMINTRINSICUTILS_H
#include <stdint.h>
#include "llvm/ADT/APFloat.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
namespace llvm {
namespace nvvm {
// Reduction Ops supported with TMA Copy from Shared
// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
// family of PTX instructions.
enum class TMAReductionOp : uint8_t {
ADD = 0,
MIN = 1,
MAX = 2,
INC = 3,
DEC = 4,
AND = 5,
OR = 6,
XOR = 7,
};
inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
case Intrinsic::nvvm_f2i_rm_ftz:
case Intrinsic::nvvm_f2i_rn_ftz:
case Intrinsic::nvvm_f2i_rp_ftz:
case Intrinsic::nvvm_f2i_rz_ftz:
case Intrinsic::nvvm_f2ui_rm_ftz:
case Intrinsic::nvvm_f2ui_rn_ftz:
case Intrinsic::nvvm_f2ui_rp_ftz:
case Intrinsic::nvvm_f2ui_rz_ftz:
case Intrinsic::nvvm_f2ll_rm_ftz:
case Intrinsic::nvvm_f2ll_rn_ftz:
case Intrinsic::nvvm_f2ll_rp_ftz:
case Intrinsic::nvvm_f2ll_rz_ftz:
case Intrinsic::nvvm_f2ull_rm_ftz:
case Intrinsic::nvvm_f2ull_rn_ftz:
case Intrinsic::nvvm_f2ull_rp_ftz:
case Intrinsic::nvvm_f2ull_rz_ftz:
return true;
case Intrinsic::nvvm_f2i_rm:
case Intrinsic::nvvm_f2i_rn:
case Intrinsic::nvvm_f2i_rp:
case Intrinsic::nvvm_f2i_rz:
case Intrinsic::nvvm_f2ui_rm:
case Intrinsic::nvvm_f2ui_rn:
case Intrinsic::nvvm_f2ui_rp:
case Intrinsic::nvvm_f2ui_rz:
case Intrinsic::nvvm_d2i_rm:
case Intrinsic::nvvm_d2i_rn:
case Intrinsic::nvvm_d2i_rp:
case Intrinsic::nvvm_d2i_rz:
case Intrinsic::nvvm_d2ui_rm:
case Intrinsic::nvvm_d2ui_rn:
case Intrinsic::nvvm_d2ui_rp:
case Intrinsic::nvvm_d2ui_rz:
case Intrinsic::nvvm_f2ll_rm:
case Intrinsic::nvvm_f2ll_rn:
case Intrinsic::nvvm_f2ll_rp:
case Intrinsic::nvvm_f2ll_rz:
case Intrinsic::nvvm_f2ull_rm:
case Intrinsic::nvvm_f2ull_rn:
case Intrinsic::nvvm_f2ull_rp:
case Intrinsic::nvvm_f2ull_rz:
case Intrinsic::nvvm_d2ll_rm:
case Intrinsic::nvvm_d2ll_rn:
case Intrinsic::nvvm_d2ll_rp:
case Intrinsic::nvvm_d2ll_rz:
case Intrinsic::nvvm_d2ull_rm:
case Intrinsic::nvvm_d2ull_rn:
case Intrinsic::nvvm_d2ull_rp:
case Intrinsic::nvvm_d2ull_rz:
return false;
}
llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
return false;
}
inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
// f2i
case Intrinsic::nvvm_f2i_rm:
case Intrinsic::nvvm_f2i_rm_ftz:
case Intrinsic::nvvm_f2i_rn:
case Intrinsic::nvvm_f2i_rn_ftz:
case Intrinsic::nvvm_f2i_rp:
case Intrinsic::nvvm_f2i_rp_ftz:
case Intrinsic::nvvm_f2i_rz:
case Intrinsic::nvvm_f2i_rz_ftz:
// d2i
case Intrinsic::nvvm_d2i_rm:
case Intrinsic::nvvm_d2i_rn:
case Intrinsic::nvvm_d2i_rp:
case Intrinsic::nvvm_d2i_rz:
// f2ll
case Intrinsic::nvvm_f2ll_rm:
case Intrinsic::nvvm_f2ll_rm_ftz:
case Intrinsic::nvvm_f2ll_rn:
case Intrinsic::nvvm_f2ll_rn_ftz:
case Intrinsic::nvvm_f2ll_rp:
case Intrinsic::nvvm_f2ll_rp_ftz:
case Intrinsic::nvvm_f2ll_rz:
case Intrinsic::nvvm_f2ll_rz_ftz:
// d2ll
case Intrinsic::nvvm_d2ll_rm:
case Intrinsic::nvvm_d2ll_rn:
case Intrinsic::nvvm_d2ll_rp:
case Intrinsic::nvvm_d2ll_rz:
return true;
// f2ui
case Intrinsic::nvvm_f2ui_rm:
case Intrinsic::nvvm_f2ui_rm_ftz:
case Intrinsic::nvvm_f2ui_rn:
case Intrinsic::nvvm_f2ui_rn_ftz:
case Intrinsic::nvvm_f2ui_rp:
case Intrinsic::nvvm_f2ui_rp_ftz:
case Intrinsic::nvvm_f2ui_rz:
case Intrinsic::nvvm_f2ui_rz_ftz:
// d2ui
case Intrinsic::nvvm_d2ui_rm:
case Intrinsic::nvvm_d2ui_rn:
case Intrinsic::nvvm_d2ui_rp:
case Intrinsic::nvvm_d2ui_rz:
// f2ull
case Intrinsic::nvvm_f2ull_rm:
case Intrinsic::nvvm_f2ull_rm_ftz:
case Intrinsic::nvvm_f2ull_rn:
case Intrinsic::nvvm_f2ull_rn_ftz:
case Intrinsic::nvvm_f2ull_rp:
case Intrinsic::nvvm_f2ull_rp_ftz:
case Intrinsic::nvvm_f2ull_rz:
case Intrinsic::nvvm_f2ull_rz_ftz:
// d2ull
case Intrinsic::nvvm_d2ull_rm:
case Intrinsic::nvvm_d2ull_rn:
case Intrinsic::nvvm_d2ull_rp:
case Intrinsic::nvvm_d2ull_rz:
return false;
}
llvm_unreachable(
"Checking invalid f2i/d2i intrinsic for signed int conversion");
return false;
}
inline APFloat::roundingMode
GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
// RM:
case Intrinsic::nvvm_f2i_rm:
case Intrinsic::nvvm_f2ui_rm:
case Intrinsic::nvvm_f2i_rm_ftz:
case Intrinsic::nvvm_f2ui_rm_ftz:
case Intrinsic::nvvm_d2i_rm:
case Intrinsic::nvvm_d2ui_rm:
case Intrinsic::nvvm_f2ll_rm:
case Intrinsic::nvvm_f2ull_rm:
case Intrinsic::nvvm_f2ll_rm_ftz:
case Intrinsic::nvvm_f2ull_rm_ftz:
case Intrinsic::nvvm_d2ll_rm:
case Intrinsic::nvvm_d2ull_rm:
return APFloat::rmTowardNegative;
// RN:
case Intrinsic::nvvm_f2i_rn:
case Intrinsic::nvvm_f2ui_rn:
case Intrinsic::nvvm_f2i_rn_ftz:
case Intrinsic::nvvm_f2ui_rn_ftz:
case Intrinsic::nvvm_d2i_rn:
case Intrinsic::nvvm_d2ui_rn:
case Intrinsic::nvvm_f2ll_rn:
case Intrinsic::nvvm_f2ull_rn:
case Intrinsic::nvvm_f2ll_rn_ftz:
case Intrinsic::nvvm_f2ull_rn_ftz:
case Intrinsic::nvvm_d2ll_rn:
case Intrinsic::nvvm_d2ull_rn:
return APFloat::rmNearestTiesToEven;
// RP:
case Intrinsic::nvvm_f2i_rp:
case Intrinsic::nvvm_f2ui_rp:
case Intrinsic::nvvm_f2i_rp_ftz:
case Intrinsic::nvvm_f2ui_rp_ftz:
case Intrinsic::nvvm_d2i_rp:
case Intrinsic::nvvm_d2ui_rp:
case Intrinsic::nvvm_f2ll_rp:
case Intrinsic::nvvm_f2ull_rp:
case Intrinsic::nvvm_f2ll_rp_ftz:
case Intrinsic::nvvm_f2ull_rp_ftz:
case Intrinsic::nvvm_d2ll_rp:
case Intrinsic::nvvm_d2ull_rp:
return APFloat::rmTowardPositive;
// RZ:
case Intrinsic::nvvm_f2i_rz:
case Intrinsic::nvvm_f2ui_rz:
case Intrinsic::nvvm_f2i_rz_ftz:
case Intrinsic::nvvm_f2ui_rz_ftz:
case Intrinsic::nvvm_d2i_rz:
case Intrinsic::nvvm_d2ui_rz:
case Intrinsic::nvvm_f2ll_rz:
case Intrinsic::nvvm_f2ull_rz:
case Intrinsic::nvvm_f2ll_rz_ftz:
case Intrinsic::nvvm_f2ull_rz_ftz:
case Intrinsic::nvvm_d2ll_rz:
case Intrinsic::nvvm_d2ull_rz:
return APFloat::rmTowardZero;
}
llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
return APFloat::roundingMode::Invalid;
}
inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
case Intrinsic::nvvm_fmax_ftz_f:
case Intrinsic::nvvm_fmax_ftz_nan_f:
case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
case Intrinsic::nvvm_fmin_ftz_f:
case Intrinsic::nvvm_fmin_ftz_nan_f:
case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
return true;
case Intrinsic::nvvm_fmax_d:
case Intrinsic::nvvm_fmax_f:
case Intrinsic::nvvm_fmax_nan_f:
case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmax_xorsign_abs_f:
case Intrinsic::nvvm_fmin_d:
case Intrinsic::nvvm_fmin_f:
case Intrinsic::nvvm_fmin_nan_f:
case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmin_xorsign_abs_f:
return false;
}
llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
return false;
}
inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
case Intrinsic::nvvm_fmax_ftz_nan_f:
case Intrinsic::nvvm_fmax_nan_f:
case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmin_ftz_nan_f:
case Intrinsic::nvvm_fmin_nan_f:
case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
return true;
case Intrinsic::nvvm_fmax_d:
case Intrinsic::nvvm_fmax_f:
case Intrinsic::nvvm_fmax_ftz_f:
case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
case Intrinsic::nvvm_fmax_xorsign_abs_f:
case Intrinsic::nvvm_fmin_d:
case Intrinsic::nvvm_fmin_f:
case Intrinsic::nvvm_fmin_ftz_f:
case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
case Intrinsic::nvvm_fmin_xorsign_abs_f:
return false;
}
llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
return false;
}
inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmax_xorsign_abs_f:
case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
case Intrinsic::nvvm_fmin_xorsign_abs_f:
return true;
case Intrinsic::nvvm_fmax_d:
case Intrinsic::nvvm_fmax_f:
case Intrinsic::nvvm_fmax_ftz_f:
case Intrinsic::nvvm_fmax_ftz_nan_f:
case Intrinsic::nvvm_fmax_nan_f:
case Intrinsic::nvvm_fmin_d:
case Intrinsic::nvvm_fmin_f:
case Intrinsic::nvvm_fmin_ftz_f:
case Intrinsic::nvvm_fmin_ftz_nan_f:
case Intrinsic::nvvm_fmin_nan_f:
return false;
}
llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
return false;
}
} // namespace nvvm
} // namespace llvm
#endif // LLVM_IR_NVVMINTRINSICUTILS_H