10 #include "TargetInfo.h"
13 using namespace clang;
24 static const unsigned MaxNumRegsForArgsRet = 16;
26 unsigned numRegsForType(
QualType Ty)
const;
28 bool isHomogeneousAggregateBaseType(
QualType Ty)
const override;
29 bool isHomogeneousAggregateSmallEnough(
const Type *
Base,
33 llvm::Type *coerceKernelArgumentType(llvm::Type *Ty,
unsigned FromAS,
34 unsigned ToAS)
const {
36 auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
37 if (PtrTy && PtrTy->getAddressSpace() == FromAS)
38 return llvm::PointerType::get(Ty->getContext(), ToAS);
55 bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(
QualType Ty)
const {
59 bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
61 uint32_t NumRegs = (getContext().getTypeSize(
Base) + 31) / 32;
64 return Members * NumRegs <= MaxNumRegsForArgsRet;
68 unsigned AMDGPUABIInfo::numRegsForType(
QualType Ty)
const {
74 QualType EltTy = VT->getElementType();
75 unsigned EltSize = getContext().getTypeSize(EltTy);
79 return (VT->getNumElements() + 1) / 2;
81 unsigned EltNumRegs = (EltSize + 31) / 32;
82 return EltNumRegs * VT->getNumElements();
91 NumRegs += numRegsForType(FieldTy);
97 return (getContext().getTypeSize(Ty) + 31) / 32;
106 unsigned NumRegsLeft = MaxNumRegsForArgsRet;
108 if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
109 Arg.info = classifyKernelArgumentType(Arg.type);
118 llvm_unreachable(
"AMDGPU does not support varargs");
149 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
153 if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
172 llvm::Type *OrigLTy = CGT.ConvertType(Ty);
173 llvm::Type *LTy = OrigLTy;
174 if (getContext().getLangOpts().HIP) {
175 LTy = coerceKernelArgumentType(
186 if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
189 getContext().getTypeAlignInChars(Ty),
201 unsigned &NumRegsLeft)
const {
202 assert(NumRegsLeft <= MaxNumRegsForArgsRet &&
"register estimate underflow");
231 unsigned NumRegs = (
Size + 31) / 32;
232 NumRegsLeft -=
std::min(NumRegsLeft, NumRegs);
241 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
245 if (NumRegsLeft > 0) {
246 unsigned NumRegs = numRegsForType(Ty);
247 if (NumRegsLeft >= NumRegs) {
248 NumRegsLeft -= NumRegs;
256 getContext().getTypeAlignInChars(Ty),
263 unsigned NumRegs = numRegsForType(Ty);
264 NumRegsLeft -=
std::min(NumRegs, NumRegsLeft);
275 void setFunctionDeclAttributes(
const FunctionDecl *FD, llvm::Function *F,
280 void setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV,
282 unsigned getOpenCLKernelCallingConv()
const override;
285 llvm::PointerType *
T,
QualType QT)
const override;
287 LangAS getASTAllocaAddressSpace()
const override {
289 getABIInfo().getDataLayout().getAllocaAddrSpace());
292 const VarDecl *D)
const override;
295 llvm::AtomicOrdering Ordering,
296 llvm::LLVMContext &Ctx)
const override;
298 llvm::Function *BlockInvokeFunc,
299 llvm::Type *BlockTy)
const override;
300 bool shouldEmitStaticExternCAliases()
const override;
301 bool shouldEmitDWARFBitFieldSeparators()
const override;
307 static void addAMDGCNMetadata(llvm::GlobalValue *GV, StringRef Name,
313 llvm::GlobalValue *GV) {
317 return !D->
hasAttr<OMPDeclareTargetDeclAttr>() &&
318 (D->
hasAttr<OpenCLKernelAttr>() ||
319 (isa<FunctionDecl>(D) && D->
hasAttr<CUDAGlobalAttr>()) ||
321 (D->
hasAttr<CUDADeviceAttr>() || D->
hasAttr<CUDAConstantAttr>() ||
322 cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
323 cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())));
326 void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
328 const auto *ReqdWGS =
330 const bool IsOpenCLKernel =
334 const auto *FlatWGS = FD->
getAttr<AMDGPUFlatWorkGroupSizeAttr>();
335 if (ReqdWGS || FlatWGS) {
337 }
else if (IsOpenCLKernel || IsHIPKernel) {
340 const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
341 const unsigned DefaultMaxWorkGroupSize =
342 IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
344 std::string AttrVal =
345 std::string(
"1,") + llvm::utostr(DefaultMaxWorkGroupSize);
346 F->addFnAttr(
"amdgpu-flat-work-group-size", AttrVal);
349 if (
const auto *
Attr = FD->
getAttr<AMDGPUWavesPerEUAttr>())
352 if (
const auto *
Attr = FD->
getAttr<AMDGPUNumSGPRAttr>()) {
353 unsigned NumSGPR =
Attr->getNumSGPR();
356 F->addFnAttr(
"amdgpu-num-sgpr", llvm::utostr(NumSGPR));
359 if (
const auto *
Attr = FD->
getAttr<AMDGPUNumVGPRAttr>()) {
360 uint32_t NumVGPR =
Attr->getNumVGPR();
363 F->addFnAttr(
"amdgpu-num-vgpr", llvm::utostr(NumVGPR));
366 if (
const auto *
Attr = FD->
getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
367 uint32_t
X =
Attr->getMaxNumWorkGroupsX()
371 uint32_t Y =
Attr->getMaxNumWorkGroupsY()
372 ?
Attr->getMaxNumWorkGroupsY()
376 uint32_t Z =
Attr->getMaxNumWorkGroupsZ()
377 ?
Attr->getMaxNumWorkGroupsZ()
383 llvm::raw_svector_ostream OS(AttrVal);
384 OS <<
X <<
',' << Y <<
',' << Z;
386 F->addFnAttr(
"amdgpu-max-num-workgroups", AttrVal.str());
394 llvm::GlobalValue *GV, StringRef Name,
396 llvm::Module *M = GV->getParent();
400 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(AnnotationName);
402 llvm::Metadata *MDVals[] = {
403 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
404 llvm::ConstantAsMetadata::get(
405 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
407 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
411 void AMDGPUTargetCodeGenInfo::addAMDGCNMetadata(llvm::GlobalValue *GV,
412 StringRef Name,
int Operand) {
419 void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
421 StringRef Name =
"__oclc_ABI_version";
422 llvm::GlobalVariable *OriginalGV = CGM.
getModule().getNamedGlobal(Name);
423 if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
427 llvm::CodeObjectVersionKind::COV_None)
430 auto *
Type = llvm::IntegerType::getIntNTy(CGM.
getModule().getContext(), 32);
431 llvm::Constant *COV = llvm::ConstantInt::get(
436 auto *GV =
new llvm::GlobalVariable(
437 CGM.
getModule(),
Type,
true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
438 nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
440 GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
445 OriginalGV->replaceAllUsesWith(GV);
446 GV->takeName(OriginalGV);
447 OriginalGV->eraseFromParent();
451 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
455 GV->setDSOLocal(
true);
458 if (GV->isDeclaration())
461 llvm::Function *F = dyn_cast<llvm::Function>(GV);
465 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
467 setFunctionDeclAttributes(FD, F, M);
470 const bool IsSYCLKernel =
473 addAMDGCNMetadata(F,
"kernel", 1);
476 F->addFnAttr(
"amdgpu-unsafe-fp-atomics",
"true");
478 if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
479 F->addFnAttr(
"amdgpu-ieee",
"false");
482 unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv()
const {
483 return llvm::CallingConv::AMDGPU_KERNEL;
491 llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
495 return llvm::ConstantPointerNull::get(PT);
498 auto NPT = llvm::PointerType::get(
500 return llvm::ConstantExpr::getAddrSpaceCast(
501 llvm::ConstantPointerNull::get(NPT), PT);
505 AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(
CodeGenModule &CGM,
509 "Address space agnostic languages only");
513 return DefaultGlobalAS;
525 return DefaultGlobalAS;
529 AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(
const LangOptions &LangOpts,
531 llvm::AtomicOrdering Ordering,
532 llvm::LLVMContext &Ctx)
const {
537 Name =
"singlethread";
561 if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
563 Name = Twine(Twine(Name) + Twine(
"-")).str();
565 Name = Twine(Twine(Name) + Twine(
"one-as")).str();
568 return Ctx.getOrInsertSyncScopeID(Name);
571 bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases()
const {
575 bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators()
const {
581 FT = getABIInfo().getContext().adjustFunctionType(
593 llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
594 CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy)
const {
598 auto *InvokeFT = Invoke->getFunctionType();
607 ArgTys.push_back(BlockTy);
608 ArgTypeNames.push_back(llvm::MDString::get(C,
"__block_literal"));
609 AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
610 ArgBaseTypeNames.push_back(llvm::MDString::get(C,
"__block_literal"));
611 ArgTypeQuals.push_back(llvm::MDString::get(C,
""));
612 AccessQuals.push_back(llvm::MDString::get(C,
"none"));
613 ArgNames.push_back(llvm::MDString::get(C,
"block_literal"));
614 for (
unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
615 ArgTys.push_back(InvokeFT->getParamType(I));
616 ArgTypeNames.push_back(llvm::MDString::get(C,
"void*"));
617 AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
618 AccessQuals.push_back(llvm::MDString::get(C,
"none"));
619 ArgBaseTypeNames.push_back(llvm::MDString::get(C,
"void*"));
620 ArgTypeQuals.push_back(llvm::MDString::get(C,
""));
622 llvm::MDString::get(C, (Twine(
"local_arg") + Twine(I)).str()));
624 std::string Name = Invoke->getName().str() +
"_kernel";
625 auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys,
false);
626 auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
628 F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
630 llvm::AttrBuilder KernelAttrs(C);
634 KernelAttrs.addAttribute(
"enqueued-block");
635 F->addFnAttrs(KernelAttrs);
637 auto IP = CGF.
Builder.saveIP();
638 auto *BB = llvm::BasicBlock::Create(C,
"entry", F);
639 Builder.SetInsertPoint(BB);
641 auto *BlockPtr = Builder.CreateAlloca(BlockTy,
nullptr);
642 BlockPtr->setAlignment(BlockAlign);
643 Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
644 auto *
Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
646 Args.push_back(
Cast);
647 for (llvm::Argument &A : llvm::drop_begin(F->args()))
649 llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
650 call->setCallingConv(Invoke->getCallingConv());
651 Builder.CreateRetVoid();
652 Builder.restoreIP(IP);
654 F->setMetadata(
"kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
655 F->setMetadata(
"kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
656 F->setMetadata(
"kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
657 F->setMetadata(
"kernel_arg_base_type",
658 llvm::MDNode::get(C, ArgBaseTypeNames));
659 F->setMetadata(
"kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
661 F->setMetadata(
"kernel_arg_name", llvm::MDNode::get(C, ArgNames));
667 llvm::Function *F,
const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
668 const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal,
669 int32_t *MaxThreadsVal) {
673 Min = FlatWGS->getMin()->EvaluateKnownConstInt(
getContext()).getExtValue();
674 Max = FlatWGS->getMax()->EvaluateKnownConstInt(
getContext()).getExtValue();
676 if (ReqdWGS &&
Min == 0 &&
Max == 0)
677 Min =
Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
680 assert(
Min <=
Max &&
"Min must be less than or equal Max");
683 *MinThreadsVal =
Min;
685 *MaxThreadsVal =
Max;
686 std::string AttrVal = llvm::utostr(
Min) +
"," + llvm::utostr(
Max);
688 F->addFnAttr(
"amdgpu-flat-work-group-size", AttrVal);
690 assert(
Max == 0 &&
"Max must be zero");
694 llvm::Function *F,
const AMDGPUWavesPerEUAttr *
Attr) {
696 Attr->getMin()->EvaluateKnownConstInt(
getContext()).getExtValue();
699 ?
Attr->getMax()->EvaluateKnownConstInt(
getContext()).getExtValue()
703 assert((
Max == 0 ||
Min <=
Max) &&
"Min must be less than or equal Max");
705 std::string AttrVal = llvm::utostr(
Min);
707 AttrVal = AttrVal +
"," + llvm::utostr(
Max);
708 F->addFnAttr(
"amdgpu-waves-per-eu", AttrVal);
710 assert(
Max == 0 &&
"Max must be zero");
713 std::unique_ptr<TargetCodeGenInfo>
715 return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.
getTypes());
static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM, const FunctionDecl *FD)
Set calling convention for CUDA/HIP kernel.
static bool requiresAMDGPUProtectedVisibility(const Decl *D, llvm::GlobalValue *GV)
static void addAMDGCOrNVVMMetadata(const char *AnnotationName, llvm::GlobalValue *GV, StringRef Name, int Operand)
Helper function for AMDGCN and NVVM targets, adds a NamedMDNode with GV, Name, and Operand as operand...
Defines the clang::TargetOptions class.
__DEVICE__ int min(int __a, int __b)
uint64_t getTargetNullPointerValue(QualType QT) const
Get target-dependent integer value for null pointer which is used for constant folding.
const TargetInfo & getTargetInfo() const
unsigned getTargetAddressSpace(LangAS AS) const
Attr - This represents one attribute.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
static ABIArgInfo getIgnore()
static ABIArgInfo getDirect(llvm::Type *T=nullptr, unsigned Offset=0, llvm::Type *Padding=nullptr, bool CanBeFlattened=true, unsigned Align=0)
static ABIArgInfo getIndirectAliased(CharUnits Alignment, unsigned AddrSpace, bool Realign=false, llvm::Type *Padding=nullptr)
Pass this in memory using the IR byref attribute.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
@ RAA_DirectInMemory
Pass it on the stack using its defined layout.
CGFunctionInfo - Class to encapsulate the information about a function definition.
unsigned getCallingConvention() const
getCallingConvention - Return the user specified calling convention, which has been translated into a...
CanQualType getReturnType() const
MutableArrayRef< ArgInfo > arguments()
ABIArgInfo & getReturnInfo()
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::LLVMContext & getLLVMContext()
This class organizes the cross-function state that is used while generating LLVM code.
const TargetInfo & getTarget() const
CodeGenTypes & getTypes()
const llvm::DataLayout & getDataLayout() const
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
llvm::Module & getModule() const
const LangOptions & getLangOpts() const
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
ASTContext & getContext() const
void addDefaultFunctionDefinitionAttributes(llvm::AttrBuilder &attrs)
Like the overload taking a Function &, but intended specifically for frontends that want to build on ...
const CodeGenOptions & getCodeGenOpts() const
This class organizes the cross-module state that is used while lowering AST types to LLVM types.
DefaultABIInfo - The default implementation for ABI specific details.
ABIArgInfo classifyArgumentType(QualType RetTy) const
ABIArgInfo classifyReturnType(QualType RetTy) const
TargetCodeGenInfo - This class organizes various target-specific codegeneration issues,...
Decl - This represents one declaration (or definition), e.g.
Represents a member of a struct/union/class.
Represents a function declaration or definition.
ExtInfo withCallingConv(CallingConv cc) const
FunctionType - C99 6.7.5.3 - Function Declarators.
ExtInfo getExtInfo() const
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
A (possibly-)qualified type.
LangAS getAddressSpace() const
Return the address space of this type.
bool isConstantStorage(const ASTContext &Ctx, bool ExcludeCtor, bool ExcludeDtor)
Represents a struct/union/class.
bool hasFlexibleArrayMember() const
field_range fields() const
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Scope - A scope is a transient data structure that is used while parsing the program.
bool allowAMDGPUUnsafeFPAtomics() const
Returns whether or not the AMDGPU unsafe floating point atomics are allowed.
virtual std::optional< LangAS > getConstantAddressSpace() const
Return an AST address space which can be used opportunistically for constant global memory.
TargetOptions & getTargetOpts() const
Retrieve the target options.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
The base class of the type hierarchy.
const T * getAs() const
Member-template getAs<specific type>'.
Represents a variable declaration or definition.
bool hasConstantInitialization() const
Determine whether this variable has constant initialization.
Represents a GCC generic vector type.
ABIArgInfo classifyReturnType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to return a particular type.
ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to pass a particular type.
CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI)
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info)
std::unique_ptr< TargetCodeGenInfo > createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM)
bool isAggregateTypeForABI(QualType T)
const Type * isSingleElementStruct(QualType T, ASTContext &Context)
isSingleElementStruct - Determine if a structure is a "single element struct", i.e.
QualType useFirstFieldIfTransparentUnion(QualType Ty)
Pass transparent unions as if they were the type of the first element.
bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, bool AsIfNoUniqueAddr=false)
isEmptyRecord - Return true iff a structure contains only empty fields.
bool Cast(InterpState &S, CodePtr OpPC)
The JSON file list parser is used to communicate input to InstallAPI.
LangAS
Defines the address space values used by the address space qualifier of QualType.
const FunctionProtoType * T
SyncScope
Defines synch scope values used internally by clang.
LangAS getLangASFromTargetAS(unsigned TargetAS)
@ HiddenVisibility
Objects with "hidden" visibility are not seen by the dynamic linker.
@ ProtectedVisibility
Objects with "protected" visibility are seen by the dynamic linker but always dynamically resolve to ...