10 #include "TargetInfo.h"
11 #include "llvm/IR/IntrinsicsNVPTX.h"
13 using namespace clang;
22 class NVPTXTargetCodeGenInfo;
24 class NVPTXABIInfo :
public ABIInfo {
25 NVPTXTargetCodeGenInfo &CGInfo;
28 NVPTXABIInfo(
CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
46 void setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV,
48 bool shouldEmitStaticExternCAliases()
const override;
54 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType()
const override {
57 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
60 llvm::Type *getCUDADeviceBuiltinTextureDeviceType()
const override {
63 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
67 LValue Src)
const override {
68 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
73 LValue Src)
const override {
74 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
80 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
86 llvm::Value *Handle =
nullptr;
90 if (
auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
91 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
92 if (
auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
98 {GV},
"texsurf_handle");
106 bool NVPTXABIInfo::isUnsupportedType(
QualType T)
const {
115 return EIT->getNumBits() >
121 return isUnsupportedType(AT->getElementType());
128 if (
const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
130 if (isUnsupportedType(I.getType()))
134 if (isUnsupportedType(I->getType()))
141 unsigned MaxSize)
const {
144 const uint64_t Alignment = getContext().getTypeAlign(Ty);
145 const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
146 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(),
Div);
155 if (getContext().getLangOpts().OpenMP &&
156 getContext().getLangOpts().OpenMPIsTargetDevice &&
157 isUnsupportedType(RetTy))
158 return coerceToIntArrayWithLimit(RetTy, 64);
166 RetTy = EnumTy->getDecl()->getIntegerType();
175 Ty = EnumTy->getDecl()->getIntegerType();
181 if (getContext().getLangOpts().CUDAIsDevice) {
184 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
187 CGInfo.getCUDADeviceBuiltinTextureDeviceType());
189 return getNaturalAlignIndirect(Ty,
true);
193 if ((EIT->getNumBits() > 128) ||
194 (!getContext().getTargetInfo().hasInt128Type() &&
195 EIT->getNumBits() > 64))
196 return getNaturalAlignIndirect(Ty,
true);
218 llvm_unreachable(
"NVPTX does not support varargs");
221 void NVPTXTargetCodeGenInfo::setTargetAttributes(
223 if (GV->isDeclaration())
225 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
229 addNVVMMetadata(GV,
"surface", 1);
231 addNVVMMetadata(GV,
"texture", 1);
236 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
239 llvm::Function *F = cast<llvm::Function>(GV);
245 if (FD->
hasAttr<OpenCLKernelAttr>()) {
248 addNVVMMetadata(F,
"kernel", 1);
250 F->addFnAttr(llvm::Attribute::NoInline);
252 bool HasMaxWorkGroupSize =
false;
253 bool HasMinWorkGroupPerCU =
false;
254 if (
const auto *MWGS = FD->
getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
255 auto MaxThreads = (*MWGS->getZDimVal()).getExtValue() *
256 (*MWGS->getYDimVal()).getExtValue() *
257 (*MWGS->getXDimVal()).getExtValue();
258 if (MaxThreads > 0) {
259 addNVVMMetadata(F,
"maxntidx", MaxThreads);
260 HasMaxWorkGroupSize =
true;
264 auto attrValue = [&](
Expr *E) {
265 const auto *CE = cast<ConstantExpr>(E);
266 std::optional<llvm::APInt> Val = CE->getResultAsAPSInt();
267 return Val->getZExtValue();
270 if (
const auto *MWGPCU =
271 FD->
getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
272 if (!HasMaxWorkGroupSize && FD->
hasAttr<OpenCLKernelAttr>()) {
274 diag::warn_launch_bounds_missing_attr)
278 addNVVMMetadata(F,
"minnctapersm", attrValue(MWGPCU->getValue()));
279 HasMinWorkGroupPerCU =
true;
283 if (
const auto *MWGPMP =
284 FD->
getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
285 if ((!HasMaxWorkGroupSize || !HasMinWorkGroupPerCU) &&
286 FD->
hasAttr<OpenCLKernelAttr>()) {
288 diag::warn_launch_bounds_missing_attr)
292 addNVVMMetadata(F,
"maxclusterrank", attrValue(MWGPMP->getValue()));
302 if (FD->
hasAttr<CUDAGlobalAttr>()) {
304 addNVVMMetadata(F,
"kernel", 1);
306 if (CUDALaunchBoundsAttr *
Attr = FD->
getAttr<CUDALaunchBoundsAttr>())
311 if (FD->
hasAttr<NVPTXKernelAttr>()) {
312 addNVVMMetadata(F,
"kernel", 1);
316 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
317 StringRef Name,
int Operand) {
318 llvm::Module *M = GV->getParent();
322 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(
"nvvm.annotations");
324 llvm::Metadata *MDVals[] = {
325 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
326 llvm::ConstantAsMetadata::get(
327 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
329 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
332 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases()
const {
338 llvm::PointerType *PT,
342 return llvm::ConstantPointerNull::get(PT);
344 auto NPT = llvm::PointerType::get(
346 return llvm::ConstantExpr::getAddrSpaceCast(
347 llvm::ConstantPointerNull::get(NPT), PT);
352 const CUDALaunchBoundsAttr *
Attr,
353 int32_t *MaxThreadsVal,
354 int32_t *MinBlocksVal,
355 int32_t *MaxClusterRankVal) {
358 MaxThreads =
Attr->getMaxThreads()->EvaluateKnownConstInt(
getContext());
359 if (MaxThreads > 0) {
361 *MaxThreadsVal = MaxThreads.getExtValue();
364 NVPTXTargetCodeGenInfo::addNVVMMetadata(F,
"maxntidx",
365 MaxThreads.getExtValue());
372 if (
Attr->getMinBlocks()) {
374 MinBlocks =
Attr->getMinBlocks()->EvaluateKnownConstInt(
getContext());
377 *MinBlocksVal = MinBlocks.getExtValue();
380 NVPTXTargetCodeGenInfo::addNVVMMetadata(F,
"minctasm",
381 MinBlocks.getExtValue());
385 if (
Attr->getMaxBlocks()) {
387 MaxBlocks =
Attr->getMaxBlocks()->EvaluateKnownConstInt(
getContext());
389 if (MaxClusterRankVal)
390 *MaxClusterRankVal = MaxBlocks.getExtValue();
393 NVPTXTargetCodeGenInfo::addNVVMMetadata(F,
"maxclusterrank",
394 MaxBlocks.getExtValue());
400 std::unique_ptr<TargetCodeGenInfo>
402 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.
getTypes());
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
const TargetInfo & getTargetInfo() const
Attr - This represents one attribute.
A fixed int type of a specified bitwidth.
Represents a base class of a C++ class.
Represents a C++ struct/union/class.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
static ABIArgInfo getIgnore()
static ABIArgInfo getDirect(llvm::Type *T=nullptr, unsigned Offset=0, llvm::Type *Padding=nullptr, bool CanBeFlattened=true, unsigned Align=0)
static ABIArgInfo getExtend(QualType Ty, llvm::Type *T=nullptr)
ABIInfo - Target specific hooks for defining how a type should be passed or returned from functions.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
CGFunctionInfo - Class to encapsulate the information about a function definition.
unsigned getCallingConvention() const
getCallingConvention - Return the user specified calling convention, which has been translated into a...
CanQualType getReturnType() const
MutableArrayRef< ArgInfo > arguments()
ABIArgInfo & getReturnInfo()
void setEffectiveCallingConvention(unsigned Value)
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
CodeGenTypes & getTypes()
const LangOptions & getLangOpts() const
DiagnosticsEngine & getDiags() const
ASTContext & getContext() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
This class organizes the cross-module state that is used while lowering AST types to LLVM types.
LValue - This represents an lvalue references.
Address getAddress() const
TargetCodeGenInfo - This class organizes various target-specific codegeneration issues,...
Decl - This represents one declaration (or definition), e.g.
SourceLocation getLocation() const
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of enums.
This represents one expression.
Represents a member of a struct/union/class.
Represents a function declaration or definition.
A (possibly-)qualified type.
Represents a struct/union/class.
field_range fields() const
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Encodes a location in the source.
virtual bool hasInt128Type() const
Determine whether the __int128 type is supported on this target.
virtual bool hasFloat16Type() const
Determine whether the _Float16 type is supported on this target.
virtual bool hasFloat128Type() const
Determine whether the __float128 type is supported on this target.
bool isFloat16Type() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
bool isScalarType() const
bool isFloat128Type() const
bool isCUDADeviceBuiltinSurfaceType() const
Check if the type is the CUDA device builtin surface type.
bool isCUDADeviceBuiltinTextureType() const
Check if the type is the CUDA device builtin texture type.
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
bool isRealFloatingType() const
Floating point categories.
const T * getAs() const
Member-template getAs<specific type>'.
Represents a variable declaration or definition.
ABIArgInfo classifyReturnType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to return a particular type.
ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to pass a particular type.
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info)
std::unique_ptr< TargetCodeGenInfo > createNVPTXTargetCodeGenInfo(CodeGenModule &CGM)
bool isAggregateTypeForABI(QualType T)
bool Div(InterpState &S, CodePtr OpPC)
1) Pops the RHS from the stack.
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T