Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CGValue.h"
21#include "CodeGenFunction.h"
22#include "CodeGenModule.h"
23#include "ConstantEmitter.h"
24#include "PatternInit.h"
25#include "TargetInfo.h"
27#include "clang/AST/Attr.h"
28#include "clang/AST/Decl.h"
29#include "clang/AST/Expr.h"
30#include "clang/AST/OSLog.h"
32#include "clang/AST/Type.h"
38#include "llvm/ADT/APFloat.h"
39#include "llvm/ADT/APInt.h"
40#include "llvm/ADT/FloatingPointMode.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/StringExtras.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsBPF.h"
51#include "llvm/IR/IntrinsicsDirectX.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/IntrinsicsNVPTX.h"
54#include "llvm/IR/IntrinsicsPowerPC.h"
55#include "llvm/IR/IntrinsicsR600.h"
56#include "llvm/IR/IntrinsicsRISCV.h"
57#include "llvm/IR/IntrinsicsS390.h"
58#include "llvm/IR/IntrinsicsWebAssembly.h"
59#include "llvm/IR/IntrinsicsX86.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/MatrixBuilder.h"
62#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63#include "llvm/Support/AMDGPUAddrSpace.h"
64#include "llvm/Support/ConvertUTF.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/ScopedPrinter.h"
67#include "llvm/TargetParser/AArch64TargetParser.h"
68#include "llvm/TargetParser/RISCVISAInfo.h"
69#include "llvm/TargetParser/RISCVTargetParser.h"
70#include "llvm/TargetParser/X86TargetParser.h"
71#include <numeric>
72#include <optional>
73#include <utility>
74
75using namespace clang;
76using namespace CodeGen;
77using namespace llvm;
78
79static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
80 Align AlignmentInBytes) {
81 ConstantInt *Byte;
82 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
83 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
84 // Nothing to initialize.
85 return;
86 case LangOptions::TrivialAutoVarInitKind::Zero:
87 Byte = CGF.Builder.getInt8(0x00);
88 break;
89 case LangOptions::TrivialAutoVarInitKind::Pattern: {
90 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
91 Byte = llvm::dyn_cast<llvm::ConstantInt>(
92 initializationPatternFor(CGF.CGM, Int8));
93 break;
94 }
95 }
96 if (CGF.CGM.stopAutoInit())
97 return;
98 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
99 I->addAnnotationMetadata("auto-init");
100}
101
103 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
104
105 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
106 Value *CMP;
107 Value *LastInstr;
108
109 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
110 FZeroConst = ConstantVector::getSplat(
111 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
112 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
113 CMP = CGF->Builder.CreateIntrinsic(
114 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
115 {FCompInst}, nullptr);
116 } else
117 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
118
119 if (CGF->CGM.getTarget().getTriple().isDXIL())
120 LastInstr = CGF->Builder.CreateIntrinsic(
121 CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
122 else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
123 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
124 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
125
126 CGF->Builder.CreateCondBr(CMP, LT0, End);
127
128 CGF->Builder.SetInsertPoint(LT0);
129
130 CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
131 nullptr);
132
133 LastInstr = CGF->Builder.CreateBr(End);
134
135 CGF->Builder.SetInsertPoint(End);
136 } else {
137 llvm_unreachable("Backend Codegen not supported.");
138 }
139
140 return LastInstr;
141}
142
144 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
145 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
146 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
147
148 CallArgList Args;
149 LValue Op1TmpLValue =
150 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
151 LValue Op2TmpLValue =
152 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
153
155 Args.reverseWritebacks();
156
157 Value *LowBits = nullptr;
158 Value *HighBits = nullptr;
159
160 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
161
162 llvm::Type *RetElementTy = CGF->Int32Ty;
163 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
164 RetElementTy = llvm::VectorType::get(
165 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
166 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
167
168 CallInst *CI = CGF->Builder.CreateIntrinsic(
169 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
170
171 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
172 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
173
174 } else {
175 // For Non DXIL targets we generate the instructions.
176
177 if (!Op0->getType()->isVectorTy()) {
178 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
179 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
180
181 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
182 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
183 } else {
184 int NumElements = 1;
185 if (const auto *VecTy =
186 E->getArg(0)->getType()->getAs<clang::VectorType>())
187 NumElements = VecTy->getNumElements();
188
189 FixedVectorType *Uint32VecTy =
190 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
191 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
192 if (NumElements == 1) {
193 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
194 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
195 } else {
196 SmallVector<int> EvenMask, OddMask;
197 for (int I = 0, E = NumElements; I != E; ++I) {
198 EvenMask.push_back(I * 2);
199 OddMask.push_back(I * 2 + 1);
200 }
201 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
202 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
203 }
204 }
205 }
206 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
207 auto *LastInst =
208 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
209 CGF->EmitWritebacks(Args);
210 return LastInst;
211}
212
214 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
215 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
216 "asdouble operands types mismatch");
217 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
218 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
219
220 llvm::Type *ResultType = CGF.DoubleTy;
221 int N = 1;
222 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
223 N = VTy->getNumElements();
224 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
225 }
226
227 if (CGF.CGM.getTarget().getTriple().isDXIL())
228 return CGF.Builder.CreateIntrinsic(
229 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
230 ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
231
232 if (!E->getArg(0)->getType()->isVectorType()) {
233 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
234 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
235 }
236
238 for (int i = 0; i < N; i++) {
239 Mask.push_back(i);
240 Mask.push_back(i + N);
241 }
242
243 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
244
245 return CGF.Builder.CreateBitCast(BitVec, ResultType);
246}
247
248/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
249/// return it as an i8 pointer.
251 LLVMContext &Context = CGF.CGM.getLLVMContext();
252 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
253 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
254 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
255 llvm::Function *F =
256 CGF.CGM.getIntrinsic(llvm::Intrinsic::read_register, {CGF.Int64Ty});
257 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
258 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
259}
260
261/// getBuiltinLibFunction - Given a builtin id for a function like
262/// "__builtin_fabsf", return a Function* for "fabsf".
264 unsigned BuiltinID) {
265 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
266
267 // Get the name, skip over the __builtin_ prefix (if necessary).
268 StringRef Name;
269 GlobalDecl D(FD);
270
271 // TODO: This list should be expanded or refactored after all GCC-compatible
272 // std libcall builtins are implemented.
273 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
274 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
275 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
276 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
277 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
278 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
279 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
280 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
281 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
282 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
283 {Builtin::BI__builtin_printf, "__printfieee128"},
284 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
285 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
286 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
287 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
288 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
289 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
290 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
291 {Builtin::BI__builtin_scanf, "__scanfieee128"},
292 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
293 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
294 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
295 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
296 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
297 };
298
299 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
300 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
301 // if it is 64-bit 'long double' mode.
302 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
303 {Builtin::BI__builtin_frexpl, "frexp"},
304 {Builtin::BI__builtin_ldexpl, "ldexp"},
305 {Builtin::BI__builtin_modfl, "modf"},
306 };
307
308 // If the builtin has been declared explicitly with an assembler label,
309 // use the mangled name. This differs from the plain label on platforms
310 // that prefix labels.
311 if (FD->hasAttr<AsmLabelAttr>())
312 Name = getMangledName(D);
313 else {
314 // TODO: This mutation should also be applied to other targets other than
315 // PPC, after backend supports IEEE 128-bit style libcalls.
316 if (getTriple().isPPC64() &&
317 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
318 F128Builtins.contains(BuiltinID))
319 Name = F128Builtins[BuiltinID];
320 else if (getTriple().isOSAIX() &&
321 &getTarget().getLongDoubleFormat() ==
322 &llvm::APFloat::IEEEdouble() &&
323 AIXLongDouble64Builtins.contains(BuiltinID))
324 Name = AIXLongDouble64Builtins[BuiltinID];
325 else
326 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
327 }
328
329 llvm::FunctionType *Ty =
330 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
331
332 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
333}
334
335/// Emit the conversions required to turn the given value into an
336/// integer of the given size.
337static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
338 QualType T, llvm::IntegerType *IntType) {
339 V = CGF.EmitToMemory(V, T);
340
341 if (V->getType()->isPointerTy())
342 return CGF.Builder.CreatePtrToInt(V, IntType);
343
344 assert(V->getType() == IntType);
345 return V;
346}
347
348static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
349 QualType T, llvm::Type *ResultType) {
350 V = CGF.EmitFromMemory(V, T);
351
352 if (ResultType->isPointerTy())
353 return CGF.Builder.CreateIntToPtr(V, ResultType);
354
355 assert(V->getType() == ResultType);
356 return V;
357}
358
360 ASTContext &Ctx = CGF.getContext();
361 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
362 unsigned Bytes = Ptr.getElementType()->isPointerTy()
364 : Ptr.getElementType()->getScalarSizeInBits() / 8;
365 unsigned Align = Ptr.getAlignment().getQuantity();
366 if (Align % Bytes != 0) {
367 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
368 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
369 // Force address to be at least naturally-aligned.
370 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
371 }
372 return Ptr;
373}
374
375/// Utility to insert an atomic instruction based on Intrinsic::ID
376/// and the expression node.
378 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
379 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
380
381 QualType T = E->getType();
382 assert(E->getArg(0)->getType()->isPointerType());
384 E->getArg(0)->getType()->getPointeeType()));
385 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
386
387 Address DestAddr = CheckAtomicAlignment(CGF, E);
388
389 llvm::IntegerType *IntType = llvm::IntegerType::get(
390 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
391
392 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
393 llvm::Type *ValueType = Val->getType();
394 Val = EmitToInt(CGF, Val, T, IntType);
395
396 llvm::Value *Result =
397 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
398 return EmitFromInt(CGF, Result, T, ValueType);
399}
400
402 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
403 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
404
405 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
406 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
407 LV.setNontemporal(true);
408 CGF.EmitStoreOfScalar(Val, LV, false);
409 return nullptr;
410}
411
413 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
414
415 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
416 LV.setNontemporal(true);
417 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
418}
419
421 llvm::AtomicRMWInst::BinOp Kind,
422 const CallExpr *E) {
423 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
424}
425
426/// Utility to insert an atomic instruction based Intrinsic::ID and
427/// the expression node, where the return value is the result of the
428/// operation.
430 llvm::AtomicRMWInst::BinOp Kind,
431 const CallExpr *E,
432 Instruction::BinaryOps Op,
433 bool Invert = false) {
434 QualType T = E->getType();
435 assert(E->getArg(0)->getType()->isPointerType());
437 E->getArg(0)->getType()->getPointeeType()));
438 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
439
440 Address DestAddr = CheckAtomicAlignment(CGF, E);
441
442 llvm::IntegerType *IntType = llvm::IntegerType::get(
443 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
444
445 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
446 llvm::Type *ValueType = Val->getType();
447 Val = EmitToInt(CGF, Val, T, IntType);
448
449 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
450 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
451 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
452 if (Invert)
453 Result =
454 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
455 llvm::ConstantInt::getAllOnesValue(IntType));
456 Result = EmitFromInt(CGF, Result, T, ValueType);
457 return RValue::get(Result);
458}
459
460/// Utility to insert an atomic cmpxchg instruction.
461///
462/// @param CGF The current codegen function.
463/// @param E Builtin call expression to convert to cmpxchg.
464/// arg0 - address to operate on
465/// arg1 - value to compare with
466/// arg2 - new value
467/// @param ReturnBool Specifies whether to return success flag of
468/// cmpxchg result or the old value.
469///
470/// @returns result of cmpxchg, according to ReturnBool
471///
472/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
473/// invoke the function EmitAtomicCmpXchgForMSIntrin.
475 bool ReturnBool) {
476 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
477 Address DestAddr = CheckAtomicAlignment(CGF, E);
478
479 llvm::IntegerType *IntType = llvm::IntegerType::get(
480 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
481
482 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
483 llvm::Type *ValueType = Cmp->getType();
484 Cmp = EmitToInt(CGF, Cmp, T, IntType);
485 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
486
488 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
489 llvm::AtomicOrdering::SequentiallyConsistent);
490 if (ReturnBool)
491 // Extract boolean success flag and zext it to int.
492 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
493 CGF.ConvertType(E->getType()));
494 else
495 // Extract old value and emit it using the same type as compare value.
496 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
497 ValueType);
498}
499
500/// This function should be invoked to emit atomic cmpxchg for Microsoft's
501/// _InterlockedCompareExchange* intrinsics which have the following signature:
502/// T _InterlockedCompareExchange(T volatile *Destination,
503/// T Exchange,
504/// T Comparand);
505///
506/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
507/// cmpxchg *Destination, Comparand, Exchange.
508/// So we need to swap Comparand and Exchange when invoking
509/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
510/// function MakeAtomicCmpXchgValue since it expects the arguments to be
511/// already swapped.
512
513static
515 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
516 assert(E->getArg(0)->getType()->isPointerType());
518 E->getType(), E->getArg(0)->getType()->getPointeeType()));
520 E->getArg(1)->getType()));
522 E->getArg(2)->getType()));
523
524 Address DestAddr = CheckAtomicAlignment(CGF, E);
525
526 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
527 auto *RTy = Exchange->getType();
528
529 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
530
531 if (RTy->isPointerTy()) {
532 Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
533 Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
534 }
535
536 // For Release ordering, the failure ordering should be Monotonic.
537 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
538 AtomicOrdering::Monotonic :
539 SuccessOrdering;
540
541 // The atomic instruction is marked volatile for consistency with MSVC. This
542 // blocks the few atomics optimizations that LLVM has. If we want to optimize
543 // _Interlocked* operations in the future, we will have to remove the volatile
544 // marker.
545 auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
546 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
547 CmpXchg->setVolatile(true);
548
549 auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
550 if (RTy->isPointerTy()) {
551 Result = CGF.Builder.CreateIntToPtr(Result, RTy);
552 }
553
554 return Result;
555}
556
557// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
558// prototyped like this:
559//
560// unsigned char _InterlockedCompareExchange128...(
561// __int64 volatile * _Destination,
562// __int64 _ExchangeHigh,
563// __int64 _ExchangeLow,
564// __int64 * _ComparandResult);
565//
566// Note that Destination is assumed to be at least 16-byte aligned, despite
567// being typed int64.
568
570 const CallExpr *E,
571 AtomicOrdering SuccessOrdering) {
572 assert(E->getNumArgs() == 4);
573 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
574 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
575 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
576 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
577
578 assert(DestPtr->getType()->isPointerTy());
579 assert(!ExchangeHigh->getType()->isPointerTy());
580 assert(!ExchangeLow->getType()->isPointerTy());
581
582 // For Release ordering, the failure ordering should be Monotonic.
583 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
584 ? AtomicOrdering::Monotonic
585 : SuccessOrdering;
586
587 // Convert to i128 pointers and values. Alignment is also overridden for
588 // destination pointer.
589 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
590 Address DestAddr(DestPtr, Int128Ty,
592 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
593
594 // (((i128)hi) << 64) | ((i128)lo)
595 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
596 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
597 ExchangeHigh =
598 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
599 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
600
601 // Load the comparand for the instruction.
602 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
603
604 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
605 SuccessOrdering, FailureOrdering);
606
607 // The atomic instruction is marked volatile for consistency with MSVC. This
608 // blocks the few atomics optimizations that LLVM has. If we want to optimize
609 // _Interlocked* operations in the future, we will have to remove the volatile
610 // marker.
611 CXI->setVolatile(true);
612
613 // Store the result as an outparameter.
614 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
615 ComparandAddr);
616
617 // Get the success boolean and zero extend it to i8.
618 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
619 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
620}
621
623 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
624 assert(E->getArg(0)->getType()->isPointerType());
625
626 auto *IntTy = CGF.ConvertType(E->getType());
627 Address DestAddr = CheckAtomicAlignment(CGF, E);
628 auto *Result = CGF.Builder.CreateAtomicRMW(
629 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
630 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
631}
632
634 CodeGenFunction &CGF, const CallExpr *E,
635 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
636 assert(E->getArg(0)->getType()->isPointerType());
637
638 auto *IntTy = CGF.ConvertType(E->getType());
639 Address DestAddr = CheckAtomicAlignment(CGF, E);
640 auto *Result = CGF.Builder.CreateAtomicRMW(
641 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
642 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
643}
644
645// Build a plain volatile load.
647 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
648 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
649 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
650 llvm::Type *ITy =
651 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
652 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
653 Load->setVolatile(true);
654 return Load;
655}
656
657// Build a plain volatile store.
659 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
660 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
661 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
662 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
663 llvm::StoreInst *Store =
664 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
665 Store->setVolatile(true);
666 return Store;
667}
668
669// Emit a simple mangled intrinsic that has 1 argument and a return type
670// matching the argument type. Depending on mode, this may be a constrained
671// floating-point intrinsic.
673 const CallExpr *E, unsigned IntrinsicID,
674 unsigned ConstrainedIntrinsicID) {
675 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
676
677 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
678 if (CGF.Builder.getIsFPConstrained()) {
679 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
680 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
681 } else {
682 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
683 return CGF.Builder.CreateCall(F, Src0);
684 }
685}
686
687// Emit an intrinsic that has 2 operands of the same type as its result.
688// Depending on mode, this may be a constrained floating-point intrinsic.
690 const CallExpr *E, unsigned IntrinsicID,
691 unsigned ConstrainedIntrinsicID) {
692 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
693 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
694
695 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
696 if (CGF.Builder.getIsFPConstrained()) {
697 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
698 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
699 } else {
700 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
701 return CGF.Builder.CreateCall(F, { Src0, Src1 });
702 }
703}
704
705// Has second type mangled argument.
707 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
708 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
709 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
710 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
711
712 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
713 if (CGF.Builder.getIsFPConstrained()) {
714 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
715 {Src0->getType(), Src1->getType()});
716 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
717 }
718
719 Function *F =
720 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
721 return CGF.Builder.CreateCall(F, {Src0, Src1});
722}
723
724// Emit an intrinsic that has 3 operands of the same type as its result.
725// Depending on mode, this may be a constrained floating-point intrinsic.
727 const CallExpr *E, unsigned IntrinsicID,
728 unsigned ConstrainedIntrinsicID) {
729 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
730 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
731 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
732
733 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
734 if (CGF.Builder.getIsFPConstrained()) {
735 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
736 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
737 } else {
738 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
739 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
740 }
741}
742
743// Emit an intrinsic where all operands are of the same type as the result.
744// Depending on mode, this may be a constrained floating-point intrinsic.
746 unsigned IntrinsicID,
747 unsigned ConstrainedIntrinsicID,
748 llvm::Type *Ty,
749 ArrayRef<Value *> Args) {
750 Function *F;
751 if (CGF.Builder.getIsFPConstrained())
752 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
753 else
754 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
755
756 if (CGF.Builder.getIsFPConstrained())
757 return CGF.Builder.CreateConstrainedFPCall(F, Args);
758 else
759 return CGF.Builder.CreateCall(F, Args);
760}
761
762// Emit a simple intrinsic that has N scalar arguments and a return type
763// matching the argument type. It is assumed that only the first argument is
764// overloaded.
765template <unsigned N>
767 const CallExpr *E,
768 unsigned IntrinsicID,
769 llvm::StringRef Name = "") {
770 static_assert(N, "expect non-empty argument");
772 for (unsigned I = 0; I < N; ++I)
773 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
774 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
775 return CGF.Builder.CreateCall(F, Args, Name);
776}
777
778// Emit an intrinsic that has 1 float or double operand, and 1 integer.
780 const CallExpr *E,
781 unsigned IntrinsicID) {
782 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
783 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
784
785 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
786 return CGF.Builder.CreateCall(F, {Src0, Src1});
787}
788
789// Emit an intrinsic that has overloaded integer result and fp operand.
790static Value *
792 unsigned IntrinsicID,
793 unsigned ConstrainedIntrinsicID) {
794 llvm::Type *ResultType = CGF.ConvertType(E->getType());
795 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
796
797 if (CGF.Builder.getIsFPConstrained()) {
798 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
799 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
800 {ResultType, Src0->getType()});
801 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
802 } else {
803 Function *F =
804 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
805 return CGF.Builder.CreateCall(F, Src0);
806 }
807}
808
810 llvm::Intrinsic::ID IntrinsicID) {
811 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
812 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
813
814 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
815 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
816 llvm::Function *F =
817 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
818 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
819
820 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
821 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
822 CGF.EmitStoreOfScalar(Exp, LV);
823
824 return CGF.Builder.CreateExtractValue(Call, 0);
825}
826
828 llvm::Intrinsic::ID IntrinsicID) {
829 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
830 llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
831 llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
832
833 llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
834 llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
835
836 llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
837 llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
838
839 QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
840 LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
841 LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
842
843 llvm::StoreInst *StoreSin =
844 CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
845 llvm::StoreInst *StoreCos =
846 CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
847
848 // Mark the two stores as non-aliasing with each other. The order of stores
849 // emitted by this builtin is arbitrary, enforcing a particular order will
850 // prevent optimizations later on.
851 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
852 MDNode *Domain = MDHelper.createAnonymousAliasScopeDomain();
853 MDNode *AliasScope = MDHelper.createAnonymousAliasScope(Domain);
854 MDNode *AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
855 StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
856 StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
857}
858
859/// EmitFAbs - Emit a call to @llvm.fabs().
861 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
862 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
863 Call->setDoesNotAccessMemory();
864 return Call;
865}
866
867/// Emit the computation of the sign bit for a floating point value. Returns
868/// the i1 sign bit value.
870 LLVMContext &C = CGF.CGM.getLLVMContext();
871
872 llvm::Type *Ty = V->getType();
873 int Width = Ty->getPrimitiveSizeInBits();
874 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
875 V = CGF.Builder.CreateBitCast(V, IntTy);
876 if (Ty->isPPC_FP128Ty()) {
877 // We want the sign bit of the higher-order double. The bitcast we just
878 // did works as if the double-double was stored to memory and then
879 // read as an i128. The "store" will put the higher-order double in the
880 // lower address in both little- and big-Endian modes, but the "load"
881 // will treat those bits as a different part of the i128: the low bits in
882 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
883 // we need to shift the high bits down to the low before truncating.
884 Width >>= 1;
885 if (CGF.getTarget().isBigEndian()) {
886 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
887 V = CGF.Builder.CreateLShr(V, ShiftCst);
888 }
889 // We are truncating value in order to extract the higher-order
890 // double, which we will be using to extract the sign from.
891 IntTy = llvm::IntegerType::get(C, Width);
892 V = CGF.Builder.CreateTrunc(V, IntTy);
893 }
894 Value *Zero = llvm::Constant::getNullValue(IntTy);
895 return CGF.Builder.CreateICmpSLT(V, Zero);
896}
897
898/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
899/// hidden pointer). This is used to check annotating FP libcalls (that could
900/// set `errno`) with "int" TBAA metadata is safe. If any floating-point
901/// arguments are passed indirectly, setup for the call could be incorrectly
902/// optimized out.
904 auto IsIndirect = [&](ABIArgInfo const &info) {
905 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
906 };
907 return !IsIndirect(FnInfo.getReturnInfo()) &&
908 llvm::none_of(FnInfo.arguments(),
909 [&](CGFunctionInfoArgInfo const &ArgInfo) {
910 return IsIndirect(ArgInfo.info);
911 });
912}
913
915 const CallExpr *E, llvm::Constant *calleeValue) {
916 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
917 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
918 llvm::CallBase *callOrInvoke = nullptr;
919 CGFunctionInfo const *FnInfo = nullptr;
920 RValue Call =
921 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
922 /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
923
924 if (unsigned BuiltinID = FD->getBuiltinID()) {
925 // Check whether a FP math builtin function, such as BI__builtin_expf
926 ASTContext &Context = CGF.getContext();
927 bool ConstWithoutErrnoAndExceptions =
929 // Restrict to target with errno, for example, MacOS doesn't set errno.
930 // TODO: Support builtin function with complex type returned, eg: cacosh
931 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
932 !CGF.Builder.getIsFPConstrained() && Call.isScalar() &&
934 // Emit "int" TBAA metadata on FP math libcalls.
935 clang::QualType IntTy = Context.IntTy;
936 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
937 CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
938 }
939 }
940 return Call;
941}
942
943/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
944/// depending on IntrinsicID.
945///
946/// \arg CGF The current codegen function.
947/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
948/// \arg X The first argument to the llvm.*.with.overflow.*.
949/// \arg Y The second argument to the llvm.*.with.overflow.*.
950/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
951/// \returns The result (i.e. sum/product) returned by the intrinsic.
952static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
953 const llvm::Intrinsic::ID IntrinsicID,
954 llvm::Value *X, llvm::Value *Y,
955 llvm::Value *&Carry) {
956 // Make sure we have integers of the same width.
957 assert(X->getType() == Y->getType() &&
958 "Arguments must be the same type. (Did you forget to make sure both "
959 "arguments have the same integer width?)");
960
961 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
962 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
963 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
964 return CGF.Builder.CreateExtractValue(Tmp, 0);
965}
966
967static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
968 int low, int high) {
969 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
970 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
971 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
972 Call->addRangeRetAttr(CR);
973 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
974 return Call;
975}
976
977namespace {
978 struct WidthAndSignedness {
979 unsigned Width;
980 bool Signed;
981 };
982}
983
984static WidthAndSignedness
986 const clang::QualType Type) {
987 assert(Type->isIntegerType() && "Given type is not an integer.");
988 unsigned Width = context.getIntWidth(Type);
990 return {Width, Signed};
991}
992
993// Given one or more integer types, this function produces an integer type that
994// encompasses them: any value in one of the given types could be expressed in
995// the encompassing type.
996static struct WidthAndSignedness
997EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
998 assert(Types.size() > 0 && "Empty list of types.");
999
1000 // If any of the given types is signed, we must return a signed type.
1001 bool Signed = false;
1002 for (const auto &Type : Types) {
1003 Signed |= Type.Signed;
1004 }
1005
1006 // The encompassing type must have a width greater than or equal to the width
1007 // of the specified types. Additionally, if the encompassing type is signed,
1008 // its width must be strictly greater than the width of any unsigned types
1009 // given.
1010 unsigned Width = 0;
1011 for (const auto &Type : Types) {
1012 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
1013 if (Width < MinWidth) {
1014 Width = MinWidth;
1015 }
1016 }
1017
1018 return {Width, Signed};
1019}
1020
1021Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
1022 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1023 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
1024 ArgValue);
1025}
1026
1027/// Checks if using the result of __builtin_object_size(p, @p From) in place of
1028/// __builtin_object_size(p, @p To) is correct
1029static bool areBOSTypesCompatible(int From, int To) {
1030 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1031 // Type=2 identically. Encoding this implementation detail here may make
1032 // improving __builtin_object_size difficult in the future, so it's omitted.
1033 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1034}
1035
1036static llvm::Value *
1037getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
1038 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
1039}
1040
1041llvm::Value *
1042CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
1043 llvm::IntegerType *ResType,
1044 llvm::Value *EmittedE,
1045 bool IsDynamic) {
1046 uint64_t ObjectSize;
1047 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
1048 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
1049 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
1050}
1051
1053 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
1054 uint64_t &Offset) {
1055 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
1056 getLangOpts().getStrictFlexArraysLevel();
1057 uint32_t FieldNo = 0;
1058
1059 if (RD->isImplicit())
1060 return nullptr;
1061
1062 for (const FieldDecl *FD : RD->fields()) {
1063 if ((!FAMDecl || FD == FAMDecl) &&
1065 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1066 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1067 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1068 Offset += Layout.getFieldOffset(FieldNo);
1069 return FD;
1070 }
1071
1072 QualType Ty = FD->getType();
1073 if (Ty->isRecordType()) {
1075 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
1076 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1077 Offset += Layout.getFieldOffset(FieldNo);
1078 return Field;
1079 }
1080 }
1081
1082 if (!RD->isUnion())
1083 ++FieldNo;
1084 }
1085
1086 return nullptr;
1087}
1088
1089static unsigned CountCountedByAttrs(const RecordDecl *RD) {
1090 unsigned Num = 0;
1091
1092 for (const FieldDecl *FD : RD->fields()) {
1093 if (FD->getType()->isCountAttributedType())
1094 return ++Num;
1095
1096 QualType Ty = FD->getType();
1097 if (Ty->isRecordType())
1099 }
1100
1101 return Num;
1102}
1103
1104llvm::Value *
1105CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
1106 llvm::IntegerType *ResType) {
1107 // The code generated here calculates the size of a struct with a flexible
1108 // array member that uses the counted_by attribute. There are two instances
1109 // we handle:
1110 //
1111 // struct s {
1112 // unsigned long flags;
1113 // int count;
1114 // int array[] __attribute__((counted_by(count)));
1115 // }
1116 //
1117 // 1) bdos of the flexible array itself:
1118 //
1119 // __builtin_dynamic_object_size(p->array, 1) ==
1120 // p->count * sizeof(*p->array)
1121 //
1122 // 2) bdos of a pointer into the flexible array:
1123 //
1124 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1125 // (p->count - 42) * sizeof(*p->array)
1126 //
1127 // 2) bdos of the whole struct, including the flexible array:
1128 //
1129 // __builtin_dynamic_object_size(p, 1) ==
1130 // max(sizeof(struct s),
1131 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1132 //
1133 ASTContext &Ctx = getContext();
1134 const Expr *Base = E->IgnoreParenImpCasts();
1135 const Expr *Idx = nullptr;
1136
1137 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
1138 UO && UO->getOpcode() == UO_AddrOf) {
1139 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
1140 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1141 Base = ASE->getBase()->IgnoreParenImpCasts();
1142 Idx = ASE->getIdx()->IgnoreParenImpCasts();
1143
1144 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1145 int64_t Val = IL->getValue().getSExtValue();
1146 if (Val < 0)
1147 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1148
1149 if (Val == 0)
1150 // The index is 0, so we don't need to take it into account.
1151 Idx = nullptr;
1152 }
1153 } else {
1154 // Potential pointer to another element in the struct.
1155 Base = SubExpr;
1156 }
1157 }
1158
1159 // Get the flexible array member Decl.
1160 const RecordDecl *OuterRD = nullptr;
1161 const FieldDecl *FAMDecl = nullptr;
1162 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1163 // Check if \p Base is referencing the FAM itself.
1164 const ValueDecl *VD = ME->getMemberDecl();
1166 FAMDecl = dyn_cast<FieldDecl>(VD);
1167 if (!FAMDecl)
1168 return nullptr;
1169 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1170 // Check if we're pointing to the whole struct.
1171 QualType Ty = DRE->getDecl()->getType();
1172 if (Ty->isPointerType())
1173 Ty = Ty->getPointeeType();
1174 OuterRD = Ty->getAsRecordDecl();
1175
1176 // If we have a situation like this:
1177 //
1178 // struct union_of_fams {
1179 // int flags;
1180 // union {
1181 // signed char normal_field;
1182 // struct {
1183 // int count1;
1184 // int arr1[] __counted_by(count1);
1185 // };
1186 // struct {
1187 // signed char count2;
1188 // int arr2[] __counted_by(count2);
1189 // };
1190 // };
1191 // };
1192 //
1193 // We don't know which 'count' to use in this scenario:
1194 //
1195 // size_t get_size(struct union_of_fams *p) {
1196 // return __builtin_dynamic_object_size(p, 1);
1197 // }
1198 //
1199 // Instead of calculating a wrong number, we give up.
1200 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1201 return nullptr;
1202 }
1203
1204 if (!OuterRD)
1205 return nullptr;
1206
1207 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1208 // get its offset.
1209 uint64_t Offset = 0;
1210 FAMDecl =
1211 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
1212 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
1213
1214 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1215 // No flexible array member found or it doesn't have the "counted_by"
1216 // attribute.
1217 return nullptr;
1218
1219 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
1220 if (!CountedByFD)
1221 // Can't find the field referenced by the "counted_by" attribute.
1222 return nullptr;
1223
1224 if (isa<DeclRefExpr>(Base))
1225 // The whole struct is specificed in the __bdos. The calculation of the
1226 // whole size of the structure can be done in two ways:
1227 //
1228 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1229 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1230 //
1231 // The first will add additional padding after the end of the array,
1232 // allocation while the second method is more precise, but not quite
1233 // expected from programmers. See
1234 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1235 // discussion of the topic.
1236 //
1237 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1238 // structure. Therefore, because of the above issue, we'll choose to match
1239 // what GCC does for consistency's sake.
1240 return nullptr;
1241
1242 // Build a load of the counted_by field.
1243 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1244 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
1245 if (!CountedByInst)
1246 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1247
1248 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1249
1250 // Build a load of the index and subtract it from the count.
1251 Value *IdxInst = nullptr;
1252 if (Idx) {
1253 if (Idx->HasSideEffects(getContext()))
1254 // We can't have side-effects.
1255 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1256
1257 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1258 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1259 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1260
1261 // We go ahead with the calculation here. If the index turns out to be
1262 // negative, we'll catch it at the end.
1263 CountedByInst =
1264 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1265 }
1266
1267 // Calculate how large the flexible array member is in bytes.
1268 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1270 llvm::Constant *ElemSize =
1271 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1272 Value *Res =
1273 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1274 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1275
1276 // A negative \p IdxInst or \p CountedByInst means that the index lands
1277 // outside of the flexible array member. If that's the case, we want to
1278 // return 0.
1279 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1280 if (IdxInst)
1281 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1282
1283 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1284}
1285
1286/// Returns a Value corresponding to the size of the given expression.
1287/// This Value may be either of the following:
1288/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1289/// it)
1290/// - A call to the @llvm.objectsize intrinsic
1291///
1292/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1293/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1294/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1295llvm::Value *
1296CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1297 llvm::IntegerType *ResType,
1298 llvm::Value *EmittedE, bool IsDynamic) {
1299 // We need to reference an argument if the pointer is a parameter with the
1300 // pass_object_size attribute.
1301 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1302 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1303 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1304 if (Param != nullptr && PS != nullptr &&
1305 areBOSTypesCompatible(PS->getType(), Type)) {
1306 auto Iter = SizeArguments.find(Param);
1307 assert(Iter != SizeArguments.end());
1308
1309 const ImplicitParamDecl *D = Iter->second;
1310 auto DIter = LocalDeclMap.find(D);
1311 assert(DIter != LocalDeclMap.end());
1312
1313 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1314 getContext().getSizeType(), E->getBeginLoc());
1315 }
1316 }
1317
1318 if (IsDynamic) {
1319 // Emit special code for a flexible array member with the "counted_by"
1320 // attribute.
1321 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1322 return V;
1323 }
1324
1325 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1326 // evaluate E for side-effects. In either case, we shouldn't lower to
1327 // @llvm.objectsize.
1328 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1329 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1330
1331 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1332 assert(Ptr->getType()->isPointerTy() &&
1333 "Non-pointer passed to __builtin_object_size?");
1334
1335 Function *F =
1336 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1337
1338 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1339 Value *Min = Builder.getInt1((Type & 2) != 0);
1340 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1341 Value *NullIsUnknown = Builder.getTrue();
1342 Value *Dynamic = Builder.getInt1(IsDynamic);
1343 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1344}
1345
1346namespace {
1347/// A struct to generically describe a bit test intrinsic.
1348struct BitTest {
1349 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1350 enum InterlockingKind : uint8_t {
1351 Unlocked,
1352 Sequential,
1353 Acquire,
1354 Release,
1355 NoFence
1356 };
1357
1358 ActionKind Action;
1359 InterlockingKind Interlocking;
1360 bool Is64Bit;
1361
1362 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1363};
1364
1365} // namespace
1366
1367BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1368 switch (BuiltinID) {
1369 // Main portable variants.
1370 case Builtin::BI_bittest:
1371 return {TestOnly, Unlocked, false};
1372 case Builtin::BI_bittestandcomplement:
1373 return {Complement, Unlocked, false};
1374 case Builtin::BI_bittestandreset:
1375 return {Reset, Unlocked, false};
1376 case Builtin::BI_bittestandset:
1377 return {Set, Unlocked, false};
1378 case Builtin::BI_interlockedbittestandreset:
1379 return {Reset, Sequential, false};
1380 case Builtin::BI_interlockedbittestandset:
1381 return {Set, Sequential, false};
1382
1383 // X86-specific 64-bit variants.
1384 case Builtin::BI_bittest64:
1385 return {TestOnly, Unlocked, true};
1386 case Builtin::BI_bittestandcomplement64:
1387 return {Complement, Unlocked, true};
1388 case Builtin::BI_bittestandreset64:
1389 return {Reset, Unlocked, true};
1390 case Builtin::BI_bittestandset64:
1391 return {Set, Unlocked, true};
1392 case Builtin::BI_interlockedbittestandreset64:
1393 return {Reset, Sequential, true};
1394 case Builtin::BI_interlockedbittestandset64:
1395 return {Set, Sequential, true};
1396
1397 // ARM/AArch64-specific ordering variants.
1398 case Builtin::BI_interlockedbittestandset_acq:
1399 return {Set, Acquire, false};
1400 case Builtin::BI_interlockedbittestandset_rel:
1401 return {Set, Release, false};
1402 case Builtin::BI_interlockedbittestandset_nf:
1403 return {Set, NoFence, false};
1404 case Builtin::BI_interlockedbittestandreset_acq:
1405 return {Reset, Acquire, false};
1406 case Builtin::BI_interlockedbittestandreset_rel:
1407 return {Reset, Release, false};
1408 case Builtin::BI_interlockedbittestandreset_nf:
1409 return {Reset, NoFence, false};
1410 }
1411 llvm_unreachable("expected only bittest intrinsics");
1412}
1413
1414static char bitActionToX86BTCode(BitTest::ActionKind A) {
1415 switch (A) {
1416 case BitTest::TestOnly: return '\0';
1417 case BitTest::Complement: return 'c';
1418 case BitTest::Reset: return 'r';
1419 case BitTest::Set: return 's';
1420 }
1421 llvm_unreachable("invalid action");
1422}
1423
1425 BitTest BT,
1426 const CallExpr *E, Value *BitBase,
1427 Value *BitPos) {
1428 char Action = bitActionToX86BTCode(BT.Action);
1429 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1430
1431 // Build the assembly.
1433 raw_svector_ostream AsmOS(Asm);
1434 if (BT.Interlocking != BitTest::Unlocked)
1435 AsmOS << "lock ";
1436 AsmOS << "bt";
1437 if (Action)
1438 AsmOS << Action;
1439 AsmOS << SizeSuffix << " $2, ($1)";
1440
1441 // Build the constraints. FIXME: We should support immediates when possible.
1442 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1443 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1444 if (!MachineClobbers.empty()) {
1445 Constraints += ',';
1446 Constraints += MachineClobbers;
1447 }
1448 llvm::IntegerType *IntType = llvm::IntegerType::get(
1449 CGF.getLLVMContext(),
1450 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1451 llvm::FunctionType *FTy =
1452 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1453
1454 llvm::InlineAsm *IA =
1455 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1456 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1457}
1458
1459static llvm::AtomicOrdering
1460getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1461 switch (I) {
1462 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1463 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1464 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1465 case BitTest::Release: return llvm::AtomicOrdering::Release;
1466 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1467 }
1468 llvm_unreachable("invalid interlocking");
1469}
1470
1471/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1472/// bits and a bit position and read and optionally modify the bit at that
1473/// position. The position index can be arbitrarily large, i.e. it can be larger
1474/// than 31 or 63, so we need an indexed load in the general case.
1475static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1476 unsigned BuiltinID,
1477 const CallExpr *E) {
1478 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1479 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1480
1481 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1482
1483 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1484 // indexing operation internally. Use them if possible.
1485 if (CGF.getTarget().getTriple().isX86())
1486 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1487
1488 // Otherwise, use generic code to load one byte and test the bit. Use all but
1489 // the bottom three bits as the array index, and the bottom three bits to form
1490 // a mask.
1491 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1492 Value *ByteIndex = CGF.Builder.CreateAShr(
1493 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1494 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBase, ByteIndex,
1495 "bittest.byteaddr"),
1496 CGF.Int8Ty, CharUnits::One());
1497 Value *PosLow =
1498 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1499 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1500
1501 // The updating instructions will need a mask.
1502 Value *Mask = nullptr;
1503 if (BT.Action != BitTest::TestOnly) {
1504 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1505 "bittest.mask");
1506 }
1507
1508 // Check the action and ordering of the interlocked intrinsics.
1509 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1510
1511 Value *OldByte = nullptr;
1512 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1513 // Emit a combined atomicrmw load/store operation for the interlocked
1514 // intrinsics.
1515 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1516 if (BT.Action == BitTest::Reset) {
1517 Mask = CGF.Builder.CreateNot(Mask);
1518 RMWOp = llvm::AtomicRMWInst::And;
1519 }
1520 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1521 } else {
1522 // Emit a plain load for the non-interlocked intrinsics.
1523 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1524 Value *NewByte = nullptr;
1525 switch (BT.Action) {
1526 case BitTest::TestOnly:
1527 // Don't store anything.
1528 break;
1529 case BitTest::Complement:
1530 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1531 break;
1532 case BitTest::Reset:
1533 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1534 break;
1535 case BitTest::Set:
1536 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1537 break;
1538 }
1539 if (NewByte)
1540 CGF.Builder.CreateStore(NewByte, ByteAddr);
1541 }
1542
1543 // However we loaded the old byte, either by plain load or atomicrmw, shift
1544 // the bit into the low position and mask it to 0 or 1.
1545 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1546 return CGF.Builder.CreateAnd(
1547 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1548}
1549
1551 unsigned BuiltinID,
1552 const CallExpr *E) {
1553 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1554
1556 raw_svector_ostream AsmOS(Asm);
1557 llvm::IntegerType *RetType = CGF.Int32Ty;
1558
1559 switch (BuiltinID) {
1560 case clang::PPC::BI__builtin_ppc_ldarx:
1561 AsmOS << "ldarx ";
1562 RetType = CGF.Int64Ty;
1563 break;
1564 case clang::PPC::BI__builtin_ppc_lwarx:
1565 AsmOS << "lwarx ";
1566 RetType = CGF.Int32Ty;
1567 break;
1568 case clang::PPC::BI__builtin_ppc_lharx:
1569 AsmOS << "lharx ";
1570 RetType = CGF.Int16Ty;
1571 break;
1572 case clang::PPC::BI__builtin_ppc_lbarx:
1573 AsmOS << "lbarx ";
1574 RetType = CGF.Int8Ty;
1575 break;
1576 default:
1577 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1578 }
1579
1580 AsmOS << "$0, ${1:y}";
1581
1582 std::string Constraints = "=r,*Z,~{memory}";
1583 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1584 if (!MachineClobbers.empty()) {
1585 Constraints += ',';
1586 Constraints += MachineClobbers;
1587 }
1588
1589 llvm::Type *PtrType = CGF.UnqualPtrTy;
1590 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1591
1592 llvm::InlineAsm *IA =
1593 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1594 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1595 CI->addParamAttr(
1596 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1597 return CI;
1598}
1599
1600namespace {
1601enum class MSVCSetJmpKind {
1602 _setjmpex,
1603 _setjmp3,
1604 _setjmp
1605};
1606}
1607
1608/// MSVC handles setjmp a bit differently on different platforms. On every
1609/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1610/// parameters can be passed as variadic arguments, but we always pass none.
1611static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1612 const CallExpr *E) {
1613 llvm::Value *Arg1 = nullptr;
1614 llvm::Type *Arg1Ty = nullptr;
1615 StringRef Name;
1616 bool IsVarArg = false;
1617 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1618 Name = "_setjmp3";
1619 Arg1Ty = CGF.Int32Ty;
1620 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1621 IsVarArg = true;
1622 } else {
1623 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1624 Arg1Ty = CGF.Int8PtrTy;
1625 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1626 Arg1 = CGF.Builder.CreateCall(
1627 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1628 } else
1629 Arg1 = CGF.Builder.CreateCall(
1630 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1631 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1632 }
1633
1634 // Mark the call site and declaration with ReturnsTwice.
1635 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1636 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1637 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1638 llvm::Attribute::ReturnsTwice);
1639 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1640 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1641 ReturnsTwiceAttr, /*Local=*/true);
1642
1643 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1644 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1645 llvm::Value *Args[] = {Buf, Arg1};
1646 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1647 CB->setAttributes(ReturnsTwiceAttr);
1648 return RValue::get(CB);
1649}
1650
1651// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1652// we handle them here.
1693 __fastfail,
1694};
1695
1696static std::optional<CodeGenFunction::MSVCIntrin>
1697translateArmToMsvcIntrin(unsigned BuiltinID) {
1698 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1699 switch (BuiltinID) {
1700 default:
1701 return std::nullopt;
1702 case clang::ARM::BI_BitScanForward:
1703 case clang::ARM::BI_BitScanForward64:
1704 return MSVCIntrin::_BitScanForward;
1705 case clang::ARM::BI_BitScanReverse:
1706 case clang::ARM::BI_BitScanReverse64:
1707 return MSVCIntrin::_BitScanReverse;
1708 case clang::ARM::BI_InterlockedAnd64:
1709 return MSVCIntrin::_InterlockedAnd;
1710 case clang::ARM::BI_InterlockedExchange64:
1711 return MSVCIntrin::_InterlockedExchange;
1712 case clang::ARM::BI_InterlockedExchangeAdd64:
1713 return MSVCIntrin::_InterlockedExchangeAdd;
1714 case clang::ARM::BI_InterlockedExchangeSub64:
1715 return MSVCIntrin::_InterlockedExchangeSub;
1716 case clang::ARM::BI_InterlockedOr64:
1717 return MSVCIntrin::_InterlockedOr;
1718 case clang::ARM::BI_InterlockedXor64:
1719 return MSVCIntrin::_InterlockedXor;
1720 case clang::ARM::BI_InterlockedDecrement64:
1721 return MSVCIntrin::_InterlockedDecrement;
1722 case clang::ARM::BI_InterlockedIncrement64:
1723 return MSVCIntrin::_InterlockedIncrement;
1724 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1725 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1726 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1727 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1728 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1729 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1730 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1731 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1732 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1733 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1734 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1735 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1736 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1737 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1738 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1739 case clang::ARM::BI_InterlockedExchange8_acq:
1740 case clang::ARM::BI_InterlockedExchange16_acq:
1741 case clang::ARM::BI_InterlockedExchange_acq:
1742 case clang::ARM::BI_InterlockedExchange64_acq:
1743 case clang::ARM::BI_InterlockedExchangePointer_acq:
1744 return MSVCIntrin::_InterlockedExchange_acq;
1745 case clang::ARM::BI_InterlockedExchange8_rel:
1746 case clang::ARM::BI_InterlockedExchange16_rel:
1747 case clang::ARM::BI_InterlockedExchange_rel:
1748 case clang::ARM::BI_InterlockedExchange64_rel:
1749 case clang::ARM::BI_InterlockedExchangePointer_rel:
1750 return MSVCIntrin::_InterlockedExchange_rel;
1751 case clang::ARM::BI_InterlockedExchange8_nf:
1752 case clang::ARM::BI_InterlockedExchange16_nf:
1753 case clang::ARM::BI_InterlockedExchange_nf:
1754 case clang::ARM::BI_InterlockedExchange64_nf:
1755 case clang::ARM::BI_InterlockedExchangePointer_nf:
1756 return MSVCIntrin::_InterlockedExchange_nf;
1757 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1758 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1759 case clang::ARM::BI_InterlockedCompareExchange_acq:
1760 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1761 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1762 return MSVCIntrin::_InterlockedCompareExchange_acq;
1763 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1764 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1765 case clang::ARM::BI_InterlockedCompareExchange_rel:
1766 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1767 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1768 return MSVCIntrin::_InterlockedCompareExchange_rel;
1769 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1770 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1771 case clang::ARM::BI_InterlockedCompareExchange_nf:
1772 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1773 return MSVCIntrin::_InterlockedCompareExchange_nf;
1774 case clang::ARM::BI_InterlockedOr8_acq:
1775 case clang::ARM::BI_InterlockedOr16_acq:
1776 case clang::ARM::BI_InterlockedOr_acq:
1777 case clang::ARM::BI_InterlockedOr64_acq:
1778 return MSVCIntrin::_InterlockedOr_acq;
1779 case clang::ARM::BI_InterlockedOr8_rel:
1780 case clang::ARM::BI_InterlockedOr16_rel:
1781 case clang::ARM::BI_InterlockedOr_rel:
1782 case clang::ARM::BI_InterlockedOr64_rel:
1783 return MSVCIntrin::_InterlockedOr_rel;
1784 case clang::ARM::BI_InterlockedOr8_nf:
1785 case clang::ARM::BI_InterlockedOr16_nf:
1786 case clang::ARM::BI_InterlockedOr_nf:
1787 case clang::ARM::BI_InterlockedOr64_nf:
1788 return MSVCIntrin::_InterlockedOr_nf;
1789 case clang::ARM::BI_InterlockedXor8_acq:
1790 case clang::ARM::BI_InterlockedXor16_acq:
1791 case clang::ARM::BI_InterlockedXor_acq:
1792 case clang::ARM::BI_InterlockedXor64_acq:
1793 return MSVCIntrin::_InterlockedXor_acq;
1794 case clang::ARM::BI_InterlockedXor8_rel:
1795 case clang::ARM::BI_InterlockedXor16_rel:
1796 case clang::ARM::BI_InterlockedXor_rel:
1797 case clang::ARM::BI_InterlockedXor64_rel:
1798 return MSVCIntrin::_InterlockedXor_rel;
1799 case clang::ARM::BI_InterlockedXor8_nf:
1800 case clang::ARM::BI_InterlockedXor16_nf:
1801 case clang::ARM::BI_InterlockedXor_nf:
1802 case clang::ARM::BI_InterlockedXor64_nf:
1803 return MSVCIntrin::_InterlockedXor_nf;
1804 case clang::ARM::BI_InterlockedAnd8_acq:
1805 case clang::ARM::BI_InterlockedAnd16_acq:
1806 case clang::ARM::BI_InterlockedAnd_acq:
1807 case clang::ARM::BI_InterlockedAnd64_acq:
1808 return MSVCIntrin::_InterlockedAnd_acq;
1809 case clang::ARM::BI_InterlockedAnd8_rel:
1810 case clang::ARM::BI_InterlockedAnd16_rel:
1811 case clang::ARM::BI_InterlockedAnd_rel:
1812 case clang::ARM::BI_InterlockedAnd64_rel:
1813 return MSVCIntrin::_InterlockedAnd_rel;
1814 case clang::ARM::BI_InterlockedAnd8_nf:
1815 case clang::ARM::BI_InterlockedAnd16_nf:
1816 case clang::ARM::BI_InterlockedAnd_nf:
1817 case clang::ARM::BI_InterlockedAnd64_nf:
1818 return MSVCIntrin::_InterlockedAnd_nf;
1819 case clang::ARM::BI_InterlockedIncrement16_acq:
1820 case clang::ARM::BI_InterlockedIncrement_acq:
1821 case clang::ARM::BI_InterlockedIncrement64_acq:
1822 return MSVCIntrin::_InterlockedIncrement_acq;
1823 case clang::ARM::BI_InterlockedIncrement16_rel:
1824 case clang::ARM::BI_InterlockedIncrement_rel:
1825 case clang::ARM::BI_InterlockedIncrement64_rel:
1826 return MSVCIntrin::_InterlockedIncrement_rel;
1827 case clang::ARM::BI_InterlockedIncrement16_nf:
1828 case clang::ARM::BI_InterlockedIncrement_nf:
1829 case clang::ARM::BI_InterlockedIncrement64_nf:
1830 return MSVCIntrin::_InterlockedIncrement_nf;
1831 case clang::ARM::BI_InterlockedDecrement16_acq:
1832 case clang::ARM::BI_InterlockedDecrement_acq:
1833 case clang::ARM::BI_InterlockedDecrement64_acq:
1834 return MSVCIntrin::_InterlockedDecrement_acq;
1835 case clang::ARM::BI_InterlockedDecrement16_rel:
1836 case clang::ARM::BI_InterlockedDecrement_rel:
1837 case clang::ARM::BI_InterlockedDecrement64_rel:
1838 return MSVCIntrin::_InterlockedDecrement_rel;
1839 case clang::ARM::BI_InterlockedDecrement16_nf:
1840 case clang::ARM::BI_InterlockedDecrement_nf:
1841 case clang::ARM::BI_InterlockedDecrement64_nf:
1842 return MSVCIntrin::_InterlockedDecrement_nf;
1843 }
1844 llvm_unreachable("must return from switch");
1845}
1846
1847static std::optional<CodeGenFunction::MSVCIntrin>
1848translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1849 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1850 switch (BuiltinID) {
1851 default:
1852 return std::nullopt;
1853 case clang::AArch64::BI_BitScanForward:
1854 case clang::AArch64::BI_BitScanForward64:
1855 return MSVCIntrin::_BitScanForward;
1856 case clang::AArch64::BI_BitScanReverse:
1857 case clang::AArch64::BI_BitScanReverse64:
1858 return MSVCIntrin::_BitScanReverse;
1859 case clang::AArch64::BI_InterlockedAnd64:
1860 return MSVCIntrin::_InterlockedAnd;
1861 case clang::AArch64::BI_InterlockedExchange64:
1862 return MSVCIntrin::_InterlockedExchange;
1863 case clang::AArch64::BI_InterlockedExchangeAdd64:
1864 return MSVCIntrin::_InterlockedExchangeAdd;
1865 case clang::AArch64::BI_InterlockedExchangeSub64:
1866 return MSVCIntrin::_InterlockedExchangeSub;
1867 case clang::AArch64::BI_InterlockedOr64:
1868 return MSVCIntrin::_InterlockedOr;
1869 case clang::AArch64::BI_InterlockedXor64:
1870 return MSVCIntrin::_InterlockedXor;
1871 case clang::AArch64::BI_InterlockedDecrement64:
1872 return MSVCIntrin::_InterlockedDecrement;
1873 case clang::AArch64::BI_InterlockedIncrement64:
1874 return MSVCIntrin::_InterlockedIncrement;
1875 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1876 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1877 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1878 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1879 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1880 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1881 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1882 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1883 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1884 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1885 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1886 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1887 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1888 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1889 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1890 case clang::AArch64::BI_InterlockedExchange8_acq:
1891 case clang::AArch64::BI_InterlockedExchange16_acq:
1892 case clang::AArch64::BI_InterlockedExchange_acq:
1893 case clang::AArch64::BI_InterlockedExchange64_acq:
1894 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1895 return MSVCIntrin::_InterlockedExchange_acq;
1896 case clang::AArch64::BI_InterlockedExchange8_rel:
1897 case clang::AArch64::BI_InterlockedExchange16_rel:
1898 case clang::AArch64::BI_InterlockedExchange_rel:
1899 case clang::AArch64::BI_InterlockedExchange64_rel:
1900 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1901 return MSVCIntrin::_InterlockedExchange_rel;
1902 case clang::AArch64::BI_InterlockedExchange8_nf:
1903 case clang::AArch64::BI_InterlockedExchange16_nf:
1904 case clang::AArch64::BI_InterlockedExchange_nf:
1905 case clang::AArch64::BI_InterlockedExchange64_nf:
1906 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1907 return MSVCIntrin::_InterlockedExchange_nf;
1908 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1909 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1910 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1911 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1912 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1913 return MSVCIntrin::_InterlockedCompareExchange_acq;
1914 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1915 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1916 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1917 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1918 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1919 return MSVCIntrin::_InterlockedCompareExchange_rel;
1920 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1921 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1922 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1923 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1924 return MSVCIntrin::_InterlockedCompareExchange_nf;
1925 case clang::AArch64::BI_InterlockedCompareExchange128:
1926 return MSVCIntrin::_InterlockedCompareExchange128;
1927 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1928 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1929 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1930 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1931 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1932 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1933 case clang::AArch64::BI_InterlockedOr8_acq:
1934 case clang::AArch64::BI_InterlockedOr16_acq:
1935 case clang::AArch64::BI_InterlockedOr_acq:
1936 case clang::AArch64::BI_InterlockedOr64_acq:
1937 return MSVCIntrin::_InterlockedOr_acq;
1938 case clang::AArch64::BI_InterlockedOr8_rel:
1939 case clang::AArch64::BI_InterlockedOr16_rel:
1940 case clang::AArch64::BI_InterlockedOr_rel:
1941 case clang::AArch64::BI_InterlockedOr64_rel:
1942 return MSVCIntrin::_InterlockedOr_rel;
1943 case clang::AArch64::BI_InterlockedOr8_nf:
1944 case clang::AArch64::BI_InterlockedOr16_nf:
1945 case clang::AArch64::BI_InterlockedOr_nf:
1946 case clang::AArch64::BI_InterlockedOr64_nf:
1947 return MSVCIntrin::_InterlockedOr_nf;
1948 case clang::AArch64::BI_InterlockedXor8_acq:
1949 case clang::AArch64::BI_InterlockedXor16_acq:
1950 case clang::AArch64::BI_InterlockedXor_acq:
1951 case clang::AArch64::BI_InterlockedXor64_acq:
1952 return MSVCIntrin::_InterlockedXor_acq;
1953 case clang::AArch64::BI_InterlockedXor8_rel:
1954 case clang::AArch64::BI_InterlockedXor16_rel:
1955 case clang::AArch64::BI_InterlockedXor_rel:
1956 case clang::AArch64::BI_InterlockedXor64_rel:
1957 return MSVCIntrin::_InterlockedXor_rel;
1958 case clang::AArch64::BI_InterlockedXor8_nf:
1959 case clang::AArch64::BI_InterlockedXor16_nf:
1960 case clang::AArch64::BI_InterlockedXor_nf:
1961 case clang::AArch64::BI_InterlockedXor64_nf:
1962 return MSVCIntrin::_InterlockedXor_nf;
1963 case clang::AArch64::BI_InterlockedAnd8_acq:
1964 case clang::AArch64::BI_InterlockedAnd16_acq:
1965 case clang::AArch64::BI_InterlockedAnd_acq:
1966 case clang::AArch64::BI_InterlockedAnd64_acq:
1967 return MSVCIntrin::_InterlockedAnd_acq;
1968 case clang::AArch64::BI_InterlockedAnd8_rel:
1969 case clang::AArch64::BI_InterlockedAnd16_rel:
1970 case clang::AArch64::BI_InterlockedAnd_rel:
1971 case clang::AArch64::BI_InterlockedAnd64_rel:
1972 return MSVCIntrin::_InterlockedAnd_rel;
1973 case clang::AArch64::BI_InterlockedAnd8_nf:
1974 case clang::AArch64::BI_InterlockedAnd16_nf:
1975 case clang::AArch64::BI_InterlockedAnd_nf:
1976 case clang::AArch64::BI_InterlockedAnd64_nf:
1977 return MSVCIntrin::_InterlockedAnd_nf;
1978 case clang::AArch64::BI_InterlockedIncrement16_acq:
1979 case clang::AArch64::BI_InterlockedIncrement_acq:
1980 case clang::AArch64::BI_InterlockedIncrement64_acq:
1981 return MSVCIntrin::_InterlockedIncrement_acq;
1982 case clang::AArch64::BI_InterlockedIncrement16_rel:
1983 case clang::AArch64::BI_InterlockedIncrement_rel:
1984 case clang::AArch64::BI_InterlockedIncrement64_rel:
1985 return MSVCIntrin::_InterlockedIncrement_rel;
1986 case clang::AArch64::BI_InterlockedIncrement16_nf:
1987 case clang::AArch64::BI_InterlockedIncrement_nf:
1988 case clang::AArch64::BI_InterlockedIncrement64_nf:
1989 return MSVCIntrin::_InterlockedIncrement_nf;
1990 case clang::AArch64::BI_InterlockedDecrement16_acq:
1991 case clang::AArch64::BI_InterlockedDecrement_acq:
1992 case clang::AArch64::BI_InterlockedDecrement64_acq:
1993 return MSVCIntrin::_InterlockedDecrement_acq;
1994 case clang::AArch64::BI_InterlockedDecrement16_rel:
1995 case clang::AArch64::BI_InterlockedDecrement_rel:
1996 case clang::AArch64::BI_InterlockedDecrement64_rel:
1997 return MSVCIntrin::_InterlockedDecrement_rel;
1998 case clang::AArch64::BI_InterlockedDecrement16_nf:
1999 case clang::AArch64::BI_InterlockedDecrement_nf:
2000 case clang::AArch64::BI_InterlockedDecrement64_nf:
2001 return MSVCIntrin::_InterlockedDecrement_nf;
2002 }
2003 llvm_unreachable("must return from switch");
2004}
2005
2006static std::optional<CodeGenFunction::MSVCIntrin>
2007translateX86ToMsvcIntrin(unsigned BuiltinID) {
2008 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
2009 switch (BuiltinID) {
2010 default:
2011 return std::nullopt;
2012 case clang::X86::BI_BitScanForward:
2013 case clang::X86::BI_BitScanForward64:
2014 return MSVCIntrin::_BitScanForward;
2015 case clang::X86::BI_BitScanReverse:
2016 case clang::X86::BI_BitScanReverse64:
2017 return MSVCIntrin::_BitScanReverse;
2018 case clang::X86::BI_InterlockedAnd64:
2019 return MSVCIntrin::_InterlockedAnd;
2020 case clang::X86::BI_InterlockedCompareExchange128:
2021 return MSVCIntrin::_InterlockedCompareExchange128;
2022 case clang::X86::BI_InterlockedExchange64:
2023 return MSVCIntrin::_InterlockedExchange;
2024 case clang::X86::BI_InterlockedExchangeAdd64:
2025 return MSVCIntrin::_InterlockedExchangeAdd;
2026 case clang::X86::BI_InterlockedExchangeSub64:
2027 return MSVCIntrin::_InterlockedExchangeSub;
2028 case clang::X86::BI_InterlockedOr64:
2029 return MSVCIntrin::_InterlockedOr;
2030 case clang::X86::BI_InterlockedXor64:
2031 return MSVCIntrin::_InterlockedXor;
2032 case clang::X86::BI_InterlockedDecrement64:
2033 return MSVCIntrin::_InterlockedDecrement;
2034 case clang::X86::BI_InterlockedIncrement64:
2035 return MSVCIntrin::_InterlockedIncrement;
2036 }
2037 llvm_unreachable("must return from switch");
2038}
2039
2040// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2041Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
2042 const CallExpr *E) {
2043 switch (BuiltinID) {
2044 case MSVCIntrin::_BitScanForward:
2045 case MSVCIntrin::_BitScanReverse: {
2046 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
2047 Value *ArgValue = EmitScalarExpr(E->getArg(1));
2048
2049 llvm::Type *ArgType = ArgValue->getType();
2050 llvm::Type *IndexType = IndexAddress.getElementType();
2051 llvm::Type *ResultType = ConvertType(E->getType());
2052
2053 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2054 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2055 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2056
2057 BasicBlock *Begin = Builder.GetInsertBlock();
2058 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
2059 Builder.SetInsertPoint(End);
2060 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
2061
2062 Builder.SetInsertPoint(Begin);
2063 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
2064 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
2065 Builder.CreateCondBr(IsZero, End, NotZero);
2066 Result->addIncoming(ResZero, Begin);
2067
2068 Builder.SetInsertPoint(NotZero);
2069
2070 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2071 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2072 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2073 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2074 Builder.CreateStore(ZeroCount, IndexAddress, false);
2075 } else {
2076 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2077 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2078
2079 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2080 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2081 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2082 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2083 Builder.CreateStore(Index, IndexAddress, false);
2084 }
2085 Builder.CreateBr(End);
2086 Result->addIncoming(ResOne, NotZero);
2087
2088 Builder.SetInsertPoint(End);
2089 return Result;
2090 }
2091 case MSVCIntrin::_InterlockedAnd:
2092 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2093 case MSVCIntrin::_InterlockedExchange:
2094 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2095 case MSVCIntrin::_InterlockedExchangeAdd:
2096 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2097 case MSVCIntrin::_InterlockedExchangeSub:
2098 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2099 case MSVCIntrin::_InterlockedOr:
2100 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2101 case MSVCIntrin::_InterlockedXor:
2102 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2103 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2104 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2105 AtomicOrdering::Acquire);
2106 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2107 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2108 AtomicOrdering::Release);
2109 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2110 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2111 AtomicOrdering::Monotonic);
2112 case MSVCIntrin::_InterlockedExchange_acq:
2113 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2114 AtomicOrdering::Acquire);
2115 case MSVCIntrin::_InterlockedExchange_rel:
2116 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2117 AtomicOrdering::Release);
2118 case MSVCIntrin::_InterlockedExchange_nf:
2119 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2120 AtomicOrdering::Monotonic);
2121 case MSVCIntrin::_InterlockedCompareExchange:
2122 return EmitAtomicCmpXchgForMSIntrin(*this, E);
2123 case MSVCIntrin::_InterlockedCompareExchange_acq:
2124 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2125 case MSVCIntrin::_InterlockedCompareExchange_rel:
2126 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2127 case MSVCIntrin::_InterlockedCompareExchange_nf:
2128 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2129 case MSVCIntrin::_InterlockedCompareExchange128:
2131 *this, E, AtomicOrdering::SequentiallyConsistent);
2132 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2133 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2134 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2135 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2136 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2137 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2138 case MSVCIntrin::_InterlockedOr_acq:
2139 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2140 AtomicOrdering::Acquire);
2141 case MSVCIntrin::_InterlockedOr_rel:
2142 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2143 AtomicOrdering::Release);
2144 case MSVCIntrin::_InterlockedOr_nf:
2145 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2146 AtomicOrdering::Monotonic);
2147 case MSVCIntrin::_InterlockedXor_acq:
2148 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2149 AtomicOrdering::Acquire);
2150 case MSVCIntrin::_InterlockedXor_rel:
2151 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2152 AtomicOrdering::Release);
2153 case MSVCIntrin::_InterlockedXor_nf:
2154 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2155 AtomicOrdering::Monotonic);
2156 case MSVCIntrin::_InterlockedAnd_acq:
2157 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2158 AtomicOrdering::Acquire);
2159 case MSVCIntrin::_InterlockedAnd_rel:
2160 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2161 AtomicOrdering::Release);
2162 case MSVCIntrin::_InterlockedAnd_nf:
2163 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2164 AtomicOrdering::Monotonic);
2165 case MSVCIntrin::_InterlockedIncrement_acq:
2166 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2167 case MSVCIntrin::_InterlockedIncrement_rel:
2168 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2169 case MSVCIntrin::_InterlockedIncrement_nf:
2170 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2171 case MSVCIntrin::_InterlockedDecrement_acq:
2172 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2173 case MSVCIntrin::_InterlockedDecrement_rel:
2174 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2175 case MSVCIntrin::_InterlockedDecrement_nf:
2176 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2177
2178 case MSVCIntrin::_InterlockedDecrement:
2179 return EmitAtomicDecrementValue(*this, E);
2180 case MSVCIntrin::_InterlockedIncrement:
2181 return EmitAtomicIncrementValue(*this, E);
2182
2183 case MSVCIntrin::__fastfail: {
2184 // Request immediate process termination from the kernel. The instruction
2185 // sequences to do this are documented on MSDN:
2186 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2187 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2188 StringRef Asm, Constraints;
2189 switch (ISA) {
2190 default:
2191 ErrorUnsupported(E, "__fastfail call for this architecture");
2192 break;
2193 case llvm::Triple::x86:
2194 case llvm::Triple::x86_64:
2195 Asm = "int $$0x29";
2196 Constraints = "{cx}";
2197 break;
2198 case llvm::Triple::thumb:
2199 Asm = "udf #251";
2200 Constraints = "{r0}";
2201 break;
2202 case llvm::Triple::aarch64:
2203 Asm = "brk #0xF003";
2204 Constraints = "{w0}";
2205 }
2206 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2207 llvm::InlineAsm *IA =
2208 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2209 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2210 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2211 llvm::Attribute::NoReturn);
2212 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2213 CI->setAttributes(NoReturnAttr);
2214 return CI;
2215 }
2216 }
2217 llvm_unreachable("Incorrect MSVC intrinsic!");
2218}
2219
2220namespace {
2221// ARC cleanup for __builtin_os_log_format
2222struct CallObjCArcUse final : EHScopeStack::Cleanup {
2223 CallObjCArcUse(llvm::Value *object) : object(object) {}
2224 llvm::Value *object;
2225
2226 void Emit(CodeGenFunction &CGF, Flags flags) override {
2227 CGF.EmitARCIntrinsicUse(object);
2228 }
2229};
2230}
2231
2233 BuiltinCheckKind Kind) {
2234 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
2235 "Unsupported builtin check kind");
2236
2237 Value *ArgValue = EmitScalarExpr(E);
2238 if (!SanOpts.has(SanitizerKind::Builtin))
2239 return ArgValue;
2240
2241 SanitizerScope SanScope(this);
2242 Value *Cond = Builder.CreateICmpNE(
2243 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2244 EmitCheck(std::make_pair(Cond, SanitizerKind::SO_Builtin),
2245 SanitizerHandler::InvalidBuiltin,
2247 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2248 {});
2249 return ArgValue;
2250}
2251
2253 Value *ArgValue = EvaluateExprAsBool(E);
2254 if (!SanOpts.has(SanitizerKind::Builtin))
2255 return ArgValue;
2256
2257 SanitizerScope SanScope(this);
2258 EmitCheck(
2259 std::make_pair(ArgValue, SanitizerKind::SO_Builtin),
2260 SanitizerHandler::InvalidBuiltin,
2262 llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)},
2263 std::nullopt);
2264 return ArgValue;
2265}
2266
2267static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2268 return CGF.Builder.CreateBinaryIntrinsic(
2269 Intrinsic::abs, ArgValue,
2270 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2271}
2272
2274 bool SanitizeOverflow) {
2275 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2276
2277 // Try to eliminate overflow check.
2278 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2279 if (!VCI->isMinSignedValue())
2280 return EmitAbs(CGF, ArgValue, true);
2281 }
2282
2283 CodeGenFunction::SanitizerScope SanScope(&CGF);
2284
2285 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2286 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2287 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2288 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2289 Value *NotOverflow = CGF.Builder.CreateNot(
2290 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2291
2292 // TODO: support -ftrapv-handler.
2293 if (SanitizeOverflow) {
2294 CGF.EmitCheck({{NotOverflow, SanitizerKind::SO_SignedIntegerOverflow}},
2295 SanitizerHandler::NegateOverflow,
2296 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2298 {ArgValue});
2299 } else
2300 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2301
2302 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2303 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2304}
2305
2306/// Get the argument type for arguments to os_log_helper.
2308 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2309 return C.getCanonicalType(UnsignedTy);
2310}
2311
2314 CharUnits BufferAlignment) {
2315 ASTContext &Ctx = getContext();
2316
2318 {
2319 raw_svector_ostream OS(Name);
2320 OS << "__os_log_helper";
2321 OS << "_" << BufferAlignment.getQuantity();
2322 OS << "_" << int(Layout.getSummaryByte());
2323 OS << "_" << int(Layout.getNumArgsByte());
2324 for (const auto &Item : Layout.Items)
2325 OS << "_" << int(Item.getSizeByte()) << "_"
2326 << int(Item.getDescriptorByte());
2327 }
2328
2329 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2330 return F;
2331
2333 FunctionArgList Args;
2334 Args.push_back(ImplicitParamDecl::Create(
2335 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2337 ArgTys.emplace_back(Ctx.VoidPtrTy);
2338
2339 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2340 char Size = Layout.Items[I].getSizeByte();
2341 if (!Size)
2342 continue;
2343
2344 QualType ArgTy = getOSLogArgType(Ctx, Size);
2345 Args.push_back(ImplicitParamDecl::Create(
2346 Ctx, nullptr, SourceLocation(),
2347 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2349 ArgTys.emplace_back(ArgTy);
2350 }
2351
2352 QualType ReturnTy = Ctx.VoidTy;
2353
2354 // The helper function has linkonce_odr linkage to enable the linker to merge
2355 // identical functions. To ensure the merging always happens, 'noinline' is
2356 // attached to the function when compiling with -Oz.
2357 const CGFunctionInfo &FI =
2359 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2360 llvm::Function *Fn = llvm::Function::Create(
2361 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2362 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2363 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2365 Fn->setDoesNotThrow();
2366
2367 // Attach 'noinline' at -Oz.
2368 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2369 Fn->addFnAttr(llvm::Attribute::NoInline);
2370
2371 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2372 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2373
2374 // Create a scope with an artificial location for the body of this function.
2375 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2376
2377 CharUnits Offset;
2379 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2380 BufferAlignment);
2381 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2382 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2383 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2384 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2385
2386 unsigned I = 1;
2387 for (const auto &Item : Layout.Items) {
2389 Builder.getInt8(Item.getDescriptorByte()),
2390 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2392 Builder.getInt8(Item.getSizeByte()),
2393 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2394
2395 CharUnits Size = Item.size();
2396 if (!Size.getQuantity())
2397 continue;
2398
2399 Address Arg = GetAddrOfLocalVar(Args[I]);
2400 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2401 Addr = Addr.withElementType(Arg.getElementType());
2403 Offset += Size;
2404 ++I;
2405 }
2406
2408
2409 return Fn;
2410}
2411
2413 assert(E.getNumArgs() >= 2 &&
2414 "__builtin_os_log_format takes at least 2 arguments");
2415 ASTContext &Ctx = getContext();
2418 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2419 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2420
2421 // Ignore argument 1, the format string. It is not currently used.
2422 CallArgList Args;
2423 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2424
2425 for (const auto &Item : Layout.Items) {
2426 int Size = Item.getSizeByte();
2427 if (!Size)
2428 continue;
2429
2430 llvm::Value *ArgVal;
2431
2432 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2433 uint64_t Val = 0;
2434 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2435 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2436 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2437 } else if (const Expr *TheExpr = Item.getExpr()) {
2438 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2439
2440 // If a temporary object that requires destruction after the full
2441 // expression is passed, push a lifetime-extended cleanup to extend its
2442 // lifetime to the end of the enclosing block scope.
2443 auto LifetimeExtendObject = [&](const Expr *E) {
2444 E = E->IgnoreParenCasts();
2445 // Extend lifetimes of objects returned by function calls and message
2446 // sends.
2447
2448 // FIXME: We should do this in other cases in which temporaries are
2449 // created including arguments of non-ARC types (e.g., C++
2450 // temporaries).
2451 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2452 return true;
2453 return false;
2454 };
2455
2456 if (TheExpr->getType()->isObjCRetainableType() &&
2457 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2458 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2459 "Only scalar can be a ObjC retainable type");
2460 if (!isa<Constant>(ArgVal)) {
2461 CleanupKind Cleanup = getARCCleanupKind();
2462 QualType Ty = TheExpr->getType();
2464 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2465 ArgVal = EmitARCRetain(Ty, ArgVal);
2466 Builder.CreateStore(ArgVal, Addr);
2467 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2469 Cleanup & EHCleanup);
2470
2471 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2472 // argument has to be alive.
2473 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2474 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2475 }
2476 }
2477 } else {
2478 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2479 }
2480
2481 unsigned ArgValSize =
2482 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2483 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2484 ArgValSize);
2485 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2486 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2487 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2488 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2489 Args.add(RValue::get(ArgVal), ArgTy);
2490 }
2491
2492 const CGFunctionInfo &FI =
2495 Layout, BufAddr.getAlignment());
2497 return RValue::get(BufAddr, *this);
2498}
2499
2501 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2502 WidthAndSignedness ResultInfo) {
2503 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2504 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2505 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2506}
2507
2509 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2510 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2511 const clang::Expr *ResultArg, QualType ResultQTy,
2512 WidthAndSignedness ResultInfo) {
2514 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2515 "Cannot specialize this multiply");
2516
2517 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2518 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2519
2520 llvm::Value *HasOverflow;
2521 llvm::Value *Result = EmitOverflowIntrinsic(
2522 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2523
2524 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2525 // however, since the original builtin had a signed result, we need to report
2526 // an overflow when the result is greater than INT_MAX.
2527 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2528 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2529
2530 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2531 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2532
2533 bool isVolatile =
2534 ResultArg->getType()->getPointeeType().isVolatileQualified();
2535 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2536 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2537 isVolatile);
2538 return RValue::get(HasOverflow);
2539}
2540
2541/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2542static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2543 WidthAndSignedness Op1Info,
2544 WidthAndSignedness Op2Info,
2545 WidthAndSignedness ResultInfo) {
2546 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2547 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2548 Op1Info.Signed != Op2Info.Signed;
2549}
2550
2551/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2552/// the generic checked-binop irgen.
2553static RValue
2555 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2556 WidthAndSignedness Op2Info,
2557 const clang::Expr *ResultArg, QualType ResultQTy,
2558 WidthAndSignedness ResultInfo) {
2559 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2560 Op2Info, ResultInfo) &&
2561 "Not a mixed-sign multipliction we can specialize");
2562
2563 // Emit the signed and unsigned operands.
2564 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2565 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2566 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2567 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2568 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2569 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2570
2571 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2572 if (SignedOpWidth < UnsignedOpWidth)
2573 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2574 if (UnsignedOpWidth < SignedOpWidth)
2575 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2576
2577 llvm::Type *OpTy = Signed->getType();
2578 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2579 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2580 llvm::Type *ResTy = ResultPtr.getElementType();
2581 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2582
2583 // Take the absolute value of the signed operand.
2584 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2585 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2586 llvm::Value *AbsSigned =
2587 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2588
2589 // Perform a checked unsigned multiplication.
2590 llvm::Value *UnsignedOverflow;
2591 llvm::Value *UnsignedResult =
2592 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2593 Unsigned, UnsignedOverflow);
2594
2595 llvm::Value *Overflow, *Result;
2596 if (ResultInfo.Signed) {
2597 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2598 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2599 auto IntMax =
2600 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2601 llvm::Value *MaxResult =
2602 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2603 CGF.Builder.CreateZExt(IsNegative, OpTy));
2604 llvm::Value *SignedOverflow =
2605 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2606 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2607
2608 // Prepare the signed result (possibly by negating it).
2609 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2610 llvm::Value *SignedResult =
2611 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2612 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2613 } else {
2614 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2615 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2616 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2617 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2618 if (ResultInfo.Width < OpWidth) {
2619 auto IntMax =
2620 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2621 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2622 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2623 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2624 }
2625
2626 // Negate the product if it would be negative in infinite precision.
2627 Result = CGF.Builder.CreateSelect(
2628 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2629
2630 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2631 }
2632 assert(Overflow && Result && "Missing overflow or result");
2633
2634 bool isVolatile =
2635 ResultArg->getType()->getPointeeType().isVolatileQualified();
2636 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2637 isVolatile);
2638 return RValue::get(Overflow);
2639}
2640
2641static bool
2643 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2644 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2645 Ty = Ctx.getBaseElementType(Arr);
2646
2647 const auto *Record = Ty->getAsCXXRecordDecl();
2648 if (!Record)
2649 return false;
2650
2651 // We've already checked this type, or are in the process of checking it.
2652 if (!Seen.insert(Record).second)
2653 return false;
2654
2655 assert(Record->hasDefinition() &&
2656 "Incomplete types should already be diagnosed");
2657
2658 if (Record->isDynamicClass())
2659 return true;
2660
2661 for (FieldDecl *F : Record->fields()) {
2662 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2663 return true;
2664 }
2665 return false;
2666}
2667
2668/// Determine if the specified type requires laundering by checking if it is a
2669/// dynamic class type or contains a subobject which is a dynamic class type.
2671 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2672 return false;
2674 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2675}
2676
2677RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2678 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2679 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2680
2681 // The builtin's shift arg may have a different type than the source arg and
2682 // result, but the LLVM intrinsic uses the same type for all values.
2683 llvm::Type *Ty = Src->getType();
2684 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2685
2686 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2687 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2688 Function *F = CGM.getIntrinsic(IID, Ty);
2689 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2690}
2691
2692// Map math builtins for long-double to f128 version.
2693static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2694 switch (BuiltinID) {
2695#define MUTATE_LDBL(func) \
2696 case Builtin::BI__builtin_##func##l: \
2697 return Builtin::BI__builtin_##func##f128;
2728 MUTATE_LDBL(nans)
2729 MUTATE_LDBL(inf)
2748 MUTATE_LDBL(huge_val)
2758#undef MUTATE_LDBL
2759 default:
2760 return BuiltinID;
2761 }
2762}
2763
2764static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2765 Value *V) {
2766 if (CGF.Builder.getIsFPConstrained() &&
2767 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2768 if (Value *Result =
2769 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2770 return Result;
2771 }
2772 return nullptr;
2773}
2774
2776 const FunctionDecl *FD) {
2777 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2778 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2779 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2780
2782 for (auto &&FormalTy : FnTy->params())
2783 Args.push_back(llvm::PoisonValue::get(FormalTy));
2784
2785 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2786}
2787
2788RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2789 const CallExpr *E,
2790 ReturnValueSlot ReturnValue) {
2791 assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
2792 "Should not codegen for consteval builtins");
2793
2794 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2795 // See if we can constant fold this builtin. If so, don't emit it at all.
2796 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2799 !Result.hasSideEffects()) {
2800 if (Result.Val.isInt())
2801 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2802 Result.Val.getInt()));
2803 if (Result.Val.isFloat())
2804 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2805 Result.Val.getFloat()));
2806 }
2807
2808 // If current long-double semantics is IEEE 128-bit, replace math builtins
2809 // of long-double with f128 equivalent.
2810 // TODO: This mutation should also be applied to other targets other than PPC,
2811 // after backend supports IEEE 128-bit style libcalls.
2812 if (getTarget().getTriple().isPPC64() &&
2813 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2814 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2815
2816 // If the builtin has been declared explicitly with an assembler label,
2817 // disable the specialized emitting below. Ideally we should communicate the
2818 // rename in IR, or at least avoid generating the intrinsic calls that are
2819 // likely to get lowered to the renamed library functions.
2820 const unsigned BuiltinIDIfNoAsmLabel =
2821 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2822
2823 std::optional<bool> ErrnoOverriden;
2824 // ErrnoOverriden is true if math-errno is overriden via the
2825 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2826 // which implies math-errno.
2827 if (E->hasStoredFPFeatures()) {
2828 FPOptionsOverride OP = E->getFPFeatures();
2829 if (OP.hasMathErrnoOverride())
2830 ErrnoOverriden = OP.getMathErrnoOverride();
2831 }
2832 // True if 'attribute__((optnone))' is used. This attribute overrides
2833 // fast-math which implies math-errno.
2834 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2835
2836 // True if we are compiling at -O2 and errno has been disabled
2837 // using the '#pragma float_control(precise, off)', and
2838 // attribute opt-none hasn't been seen.
2839 bool ErrnoOverridenToFalseWithOpt =
2840 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2841 CGM.getCodeGenOpts().OptimizationLevel != 0;
2842
2843 // There are LLVM math intrinsics/instructions corresponding to math library
2844 // functions except the LLVM op will never set errno while the math library
2845 // might. Also, math builtins have the same semantics as their math library
2846 // twins. Thus, we can transform math library and builtin calls to their
2847 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2848 // In case FP exceptions are enabled, the experimental versions of the
2849 // intrinsics model those.
2850 bool ConstAlways =
2851 getContext().BuiltinInfo.isConst(BuiltinID);
2852
2853 // There's a special case with the fma builtins where they are always const
2854 // if the target environment is GNU or the target is OS is Windows and we're
2855 // targeting the MSVCRT.dll environment.
2856 // FIXME: This list can be become outdated. Need to find a way to get it some
2857 // other way.
2858 switch (BuiltinID) {
2859 case Builtin::BI__builtin_fma:
2860 case Builtin::BI__builtin_fmaf:
2861 case Builtin::BI__builtin_fmal:
2862 case Builtin::BI__builtin_fmaf16:
2863 case Builtin::BIfma:
2864 case Builtin::BIfmaf:
2865 case Builtin::BIfmal: {
2866 auto &Trip = CGM.getTriple();
2867 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2868 ConstAlways = true;
2869 break;
2870 }
2871 default:
2872 break;
2873 }
2874
2875 bool ConstWithoutErrnoAndExceptions =
2877 bool ConstWithoutExceptions =
2879
2880 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2881 // disabled.
2882 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2883 // or attributes that affect math-errno should prevent or allow math
2884 // intrincs to be generated. Intrinsics are generated:
2885 // 1- In fast math mode, unless math-errno is overriden
2886 // via '#pragma float_control(precise, on)', or via an
2887 // 'attribute__((optnone))'.
2888 // 2- If math-errno was enabled on command line but overriden
2889 // to false via '#pragma float_control(precise, off))' and
2890 // 'attribute__((optnone))' hasn't been used.
2891 // 3- If we are compiling with optimization and errno has been disabled
2892 // via '#pragma float_control(precise, off)', and
2893 // 'attribute__((optnone))' hasn't been used.
2894
2895 bool ConstWithoutErrnoOrExceptions =
2896 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2897 bool GenerateIntrinsics =
2898 (ConstAlways && !OptNone) ||
2899 (!getLangOpts().MathErrno &&
2900 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2901 if (!GenerateIntrinsics) {
2902 GenerateIntrinsics =
2903 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2904 if (!GenerateIntrinsics)
2905 GenerateIntrinsics =
2906 ConstWithoutErrnoOrExceptions &&
2907 (!getLangOpts().MathErrno &&
2908 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2909 if (!GenerateIntrinsics)
2910 GenerateIntrinsics =
2911 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2912 }
2913 if (GenerateIntrinsics) {
2914 switch (BuiltinIDIfNoAsmLabel) {
2915 case Builtin::BIacos:
2916 case Builtin::BIacosf:
2917 case Builtin::BIacosl:
2918 case Builtin::BI__builtin_acos:
2919 case Builtin::BI__builtin_acosf:
2920 case Builtin::BI__builtin_acosf16:
2921 case Builtin::BI__builtin_acosl:
2922 case Builtin::BI__builtin_acosf128:
2924 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2925
2926 case Builtin::BIasin:
2927 case Builtin::BIasinf:
2928 case Builtin::BIasinl:
2929 case Builtin::BI__builtin_asin:
2930 case Builtin::BI__builtin_asinf:
2931 case Builtin::BI__builtin_asinf16:
2932 case Builtin::BI__builtin_asinl:
2933 case Builtin::BI__builtin_asinf128:
2935 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2936
2937 case Builtin::BIatan:
2938 case Builtin::BIatanf:
2939 case Builtin::BIatanl:
2940 case Builtin::BI__builtin_atan:
2941 case Builtin::BI__builtin_atanf:
2942 case Builtin::BI__builtin_atanf16:
2943 case Builtin::BI__builtin_atanl:
2944 case Builtin::BI__builtin_atanf128:
2946 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2947
2948 case Builtin::BIatan2:
2949 case Builtin::BIatan2f:
2950 case Builtin::BIatan2l:
2951 case Builtin::BI__builtin_atan2:
2952 case Builtin::BI__builtin_atan2f:
2953 case Builtin::BI__builtin_atan2f16:
2954 case Builtin::BI__builtin_atan2l:
2955 case Builtin::BI__builtin_atan2f128:
2957 *this, E, Intrinsic::atan2,
2958 Intrinsic::experimental_constrained_atan2));
2959
2960 case Builtin::BIceil:
2961 case Builtin::BIceilf:
2962 case Builtin::BIceill:
2963 case Builtin::BI__builtin_ceil:
2964 case Builtin::BI__builtin_ceilf:
2965 case Builtin::BI__builtin_ceilf16:
2966 case Builtin::BI__builtin_ceill:
2967 case Builtin::BI__builtin_ceilf128:
2969 Intrinsic::ceil,
2970 Intrinsic::experimental_constrained_ceil));
2971
2972 case Builtin::BIcopysign:
2973 case Builtin::BIcopysignf:
2974 case Builtin::BIcopysignl:
2975 case Builtin::BI__builtin_copysign:
2976 case Builtin::BI__builtin_copysignf:
2977 case Builtin::BI__builtin_copysignf16:
2978 case Builtin::BI__builtin_copysignl:
2979 case Builtin::BI__builtin_copysignf128:
2980 return RValue::get(
2981 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2982
2983 case Builtin::BIcos:
2984 case Builtin::BIcosf:
2985 case Builtin::BIcosl:
2986 case Builtin::BI__builtin_cos:
2987 case Builtin::BI__builtin_cosf:
2988 case Builtin::BI__builtin_cosf16:
2989 case Builtin::BI__builtin_cosl:
2990 case Builtin::BI__builtin_cosf128:
2992 Intrinsic::cos,
2993 Intrinsic::experimental_constrained_cos));
2994
2995 case Builtin::BIcosh:
2996 case Builtin::BIcoshf:
2997 case Builtin::BIcoshl:
2998 case Builtin::BI__builtin_cosh:
2999 case Builtin::BI__builtin_coshf:
3000 case Builtin::BI__builtin_coshf16:
3001 case Builtin::BI__builtin_coshl:
3002 case Builtin::BI__builtin_coshf128:
3004 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
3005
3006 case Builtin::BIexp:
3007 case Builtin::BIexpf:
3008 case Builtin::BIexpl:
3009 case Builtin::BI__builtin_exp:
3010 case Builtin::BI__builtin_expf:
3011 case Builtin::BI__builtin_expf16:
3012 case Builtin::BI__builtin_expl:
3013 case Builtin::BI__builtin_expf128:
3015 Intrinsic::exp,
3016 Intrinsic::experimental_constrained_exp));
3017
3018 case Builtin::BIexp2:
3019 case Builtin::BIexp2f:
3020 case Builtin::BIexp2l:
3021 case Builtin::BI__builtin_exp2:
3022 case Builtin::BI__builtin_exp2f:
3023 case Builtin::BI__builtin_exp2f16:
3024 case Builtin::BI__builtin_exp2l:
3025 case Builtin::BI__builtin_exp2f128:
3027 Intrinsic::exp2,
3028 Intrinsic::experimental_constrained_exp2));
3029 case Builtin::BI__builtin_exp10:
3030 case Builtin::BI__builtin_exp10f:
3031 case Builtin::BI__builtin_exp10f16:
3032 case Builtin::BI__builtin_exp10l:
3033 case Builtin::BI__builtin_exp10f128: {
3034 // TODO: strictfp support
3035 if (Builder.getIsFPConstrained())
3036 break;
3037 return RValue::get(
3038 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
3039 }
3040 case Builtin::BIfabs:
3041 case Builtin::BIfabsf:
3042 case Builtin::BIfabsl:
3043 case Builtin::BI__builtin_fabs:
3044 case Builtin::BI__builtin_fabsf:
3045 case Builtin::BI__builtin_fabsf16:
3046 case Builtin::BI__builtin_fabsl:
3047 case Builtin::BI__builtin_fabsf128:
3048 return RValue::get(
3049 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
3050
3051 case Builtin::BIfloor:
3052 case Builtin::BIfloorf:
3053 case Builtin::BIfloorl:
3054 case Builtin::BI__builtin_floor:
3055 case Builtin::BI__builtin_floorf:
3056 case Builtin::BI__builtin_floorf16:
3057 case Builtin::BI__builtin_floorl:
3058 case Builtin::BI__builtin_floorf128:
3060 Intrinsic::floor,
3061 Intrinsic::experimental_constrained_floor));
3062
3063 case Builtin::BIfma:
3064 case Builtin::BIfmaf:
3065 case Builtin::BIfmal:
3066 case Builtin::BI__builtin_fma:
3067 case Builtin::BI__builtin_fmaf:
3068 case Builtin::BI__builtin_fmaf16:
3069 case Builtin::BI__builtin_fmal:
3070 case Builtin::BI__builtin_fmaf128:
3072 Intrinsic::fma,
3073 Intrinsic::experimental_constrained_fma));
3074
3075 case Builtin::BIfmax:
3076 case Builtin::BIfmaxf:
3077 case Builtin::BIfmaxl:
3078 case Builtin::BI__builtin_fmax:
3079 case Builtin::BI__builtin_fmaxf:
3080 case Builtin::BI__builtin_fmaxf16:
3081 case Builtin::BI__builtin_fmaxl:
3082 case Builtin::BI__builtin_fmaxf128:
3084 Intrinsic::maxnum,
3085 Intrinsic::experimental_constrained_maxnum));
3086
3087 case Builtin::BIfmin:
3088 case Builtin::BIfminf:
3089 case Builtin::BIfminl:
3090 case Builtin::BI__builtin_fmin:
3091 case Builtin::BI__builtin_fminf:
3092 case Builtin::BI__builtin_fminf16:
3093 case Builtin::BI__builtin_fminl:
3094 case Builtin::BI__builtin_fminf128:
3096 Intrinsic::minnum,
3097 Intrinsic::experimental_constrained_minnum));
3098
3099 case Builtin::BIfmaximum_num:
3100 case Builtin::BIfmaximum_numf:
3101 case Builtin::BIfmaximum_numl:
3102 case Builtin::BI__builtin_fmaximum_num:
3103 case Builtin::BI__builtin_fmaximum_numf:
3104 case Builtin::BI__builtin_fmaximum_numf16:
3105 case Builtin::BI__builtin_fmaximum_numl:
3106 case Builtin::BI__builtin_fmaximum_numf128:
3107 return RValue::get(
3108 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::maximumnum));
3109
3110 case Builtin::BIfminimum_num:
3111 case Builtin::BIfminimum_numf:
3112 case Builtin::BIfminimum_numl:
3113 case Builtin::BI__builtin_fminimum_num:
3114 case Builtin::BI__builtin_fminimum_numf:
3115 case Builtin::BI__builtin_fminimum_numf16:
3116 case Builtin::BI__builtin_fminimum_numl:
3117 case Builtin::BI__builtin_fminimum_numf128:
3118 return RValue::get(
3119 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::minimumnum));
3120
3121 // fmod() is a special-case. It maps to the frem instruction rather than an
3122 // LLVM intrinsic.
3123 case Builtin::BIfmod:
3124 case Builtin::BIfmodf:
3125 case Builtin::BIfmodl:
3126 case Builtin::BI__builtin_fmod:
3127 case Builtin::BI__builtin_fmodf:
3128 case Builtin::BI__builtin_fmodf16:
3129 case Builtin::BI__builtin_fmodl:
3130 case Builtin::BI__builtin_fmodf128:
3131 case Builtin::BI__builtin_elementwise_fmod: {
3132 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3133 Value *Arg1 = EmitScalarExpr(E->getArg(0));
3134 Value *Arg2 = EmitScalarExpr(E->getArg(1));
3135 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
3136 }
3137
3138 case Builtin::BIlog:
3139 case Builtin::BIlogf:
3140 case Builtin::BIlogl:
3141 case Builtin::BI__builtin_log:
3142 case Builtin::BI__builtin_logf:
3143 case Builtin::BI__builtin_logf16:
3144 case Builtin::BI__builtin_logl:
3145 case Builtin::BI__builtin_logf128:
3147 Intrinsic::log,
3148 Intrinsic::experimental_constrained_log));
3149
3150 case Builtin::BIlog10:
3151 case Builtin::BIlog10f:
3152 case Builtin::BIlog10l:
3153 case Builtin::BI__builtin_log10:
3154 case Builtin::BI__builtin_log10f:
3155 case Builtin::BI__builtin_log10f16:
3156 case Builtin::BI__builtin_log10l:
3157 case Builtin::BI__builtin_log10f128:
3159 Intrinsic::log10,
3160 Intrinsic::experimental_constrained_log10));
3161
3162 case Builtin::BIlog2:
3163 case Builtin::BIlog2f:
3164 case Builtin::BIlog2l:
3165 case Builtin::BI__builtin_log2:
3166 case Builtin::BI__builtin_log2f:
3167 case Builtin::BI__builtin_log2f16:
3168 case Builtin::BI__builtin_log2l:
3169 case Builtin::BI__builtin_log2f128:
3171 Intrinsic::log2,
3172 Intrinsic::experimental_constrained_log2));
3173
3174 case Builtin::BInearbyint:
3175 case Builtin::BInearbyintf:
3176 case Builtin::BInearbyintl:
3177 case Builtin::BI__builtin_nearbyint:
3178 case Builtin::BI__builtin_nearbyintf:
3179 case Builtin::BI__builtin_nearbyintl:
3180 case Builtin::BI__builtin_nearbyintf128:
3182 Intrinsic::nearbyint,
3183 Intrinsic::experimental_constrained_nearbyint));
3184
3185 case Builtin::BIpow:
3186 case Builtin::BIpowf:
3187 case Builtin::BIpowl:
3188 case Builtin::BI__builtin_pow:
3189 case Builtin::BI__builtin_powf:
3190 case Builtin::BI__builtin_powf16:
3191 case Builtin::BI__builtin_powl:
3192 case Builtin::BI__builtin_powf128:
3194 Intrinsic::pow,
3195 Intrinsic::experimental_constrained_pow));
3196
3197 case Builtin::BIrint:
3198 case Builtin::BIrintf:
3199 case Builtin::BIrintl:
3200 case Builtin::BI__builtin_rint:
3201 case Builtin::BI__builtin_rintf:
3202 case Builtin::BI__builtin_rintf16:
3203 case Builtin::BI__builtin_rintl:
3204 case Builtin::BI__builtin_rintf128:
3206 Intrinsic::rint,
3207 Intrinsic::experimental_constrained_rint));
3208
3209 case Builtin::BIround:
3210 case Builtin::BIroundf:
3211 case Builtin::BIroundl:
3212 case Builtin::BI__builtin_round:
3213 case Builtin::BI__builtin_roundf:
3214 case Builtin::BI__builtin_roundf16:
3215 case Builtin::BI__builtin_roundl:
3216 case Builtin::BI__builtin_roundf128:
3218 Intrinsic::round,
3219 Intrinsic::experimental_constrained_round));
3220
3221 case Builtin::BIroundeven:
3222 case Builtin::BIroundevenf:
3223 case Builtin::BIroundevenl:
3224 case Builtin::BI__builtin_roundeven:
3225 case Builtin::BI__builtin_roundevenf:
3226 case Builtin::BI__builtin_roundevenf16:
3227 case Builtin::BI__builtin_roundevenl:
3228 case Builtin::BI__builtin_roundevenf128:
3230 Intrinsic::roundeven,
3231 Intrinsic::experimental_constrained_roundeven));
3232
3233 case Builtin::BIsin:
3234 case Builtin::BIsinf:
3235 case Builtin::BIsinl:
3236 case Builtin::BI__builtin_sin:
3237 case Builtin::BI__builtin_sinf:
3238 case Builtin::BI__builtin_sinf16:
3239 case Builtin::BI__builtin_sinl:
3240 case Builtin::BI__builtin_sinf128:
3242 Intrinsic::sin,
3243 Intrinsic::experimental_constrained_sin));
3244
3245 case Builtin::BIsinh:
3246 case Builtin::BIsinhf:
3247 case Builtin::BIsinhl:
3248 case Builtin::BI__builtin_sinh:
3249 case Builtin::BI__builtin_sinhf:
3250 case Builtin::BI__builtin_sinhf16:
3251 case Builtin::BI__builtin_sinhl:
3252 case Builtin::BI__builtin_sinhf128:
3254 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3255
3256 case Builtin::BI__builtin_sincos:
3257 case Builtin::BI__builtin_sincosf:
3258 case Builtin::BI__builtin_sincosf16:
3259 case Builtin::BI__builtin_sincosl:
3260 case Builtin::BI__builtin_sincosf128:
3261 emitSincosBuiltin(*this, E, Intrinsic::sincos);
3262 return RValue::get(nullptr);
3263
3264 case Builtin::BIsqrt:
3265 case Builtin::BIsqrtf:
3266 case Builtin::BIsqrtl:
3267 case Builtin::BI__builtin_sqrt:
3268 case Builtin::BI__builtin_sqrtf:
3269 case Builtin::BI__builtin_sqrtf16:
3270 case Builtin::BI__builtin_sqrtl:
3271 case Builtin::BI__builtin_sqrtf128:
3272 case Builtin::BI__builtin_elementwise_sqrt: {
3274 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3276 return RValue::get(Call);
3277 }
3278
3279 case Builtin::BItan:
3280 case Builtin::BItanf:
3281 case Builtin::BItanl:
3282 case Builtin::BI__builtin_tan:
3283 case Builtin::BI__builtin_tanf:
3284 case Builtin::BI__builtin_tanf16:
3285 case Builtin::BI__builtin_tanl:
3286 case Builtin::BI__builtin_tanf128:
3288 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3289
3290 case Builtin::BItanh:
3291 case Builtin::BItanhf:
3292 case Builtin::BItanhl:
3293 case Builtin::BI__builtin_tanh:
3294 case Builtin::BI__builtin_tanhf:
3295 case Builtin::BI__builtin_tanhf16:
3296 case Builtin::BI__builtin_tanhl:
3297 case Builtin::BI__builtin_tanhf128:
3299 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3300
3301 case Builtin::BItrunc:
3302 case Builtin::BItruncf:
3303 case Builtin::BItruncl:
3304 case Builtin::BI__builtin_trunc:
3305 case Builtin::BI__builtin_truncf:
3306 case Builtin::BI__builtin_truncf16:
3307 case Builtin::BI__builtin_truncl:
3308 case Builtin::BI__builtin_truncf128:
3310 Intrinsic::trunc,
3311 Intrinsic::experimental_constrained_trunc));
3312
3313 case Builtin::BIlround:
3314 case Builtin::BIlroundf:
3315 case Builtin::BIlroundl:
3316 case Builtin::BI__builtin_lround:
3317 case Builtin::BI__builtin_lroundf:
3318 case Builtin::BI__builtin_lroundl:
3319 case Builtin::BI__builtin_lroundf128:
3321 *this, E, Intrinsic::lround,
3322 Intrinsic::experimental_constrained_lround));
3323
3324 case Builtin::BIllround:
3325 case Builtin::BIllroundf:
3326 case Builtin::BIllroundl:
3327 case Builtin::BI__builtin_llround:
3328 case Builtin::BI__builtin_llroundf:
3329 case Builtin::BI__builtin_llroundl:
3330 case Builtin::BI__builtin_llroundf128:
3332 *this, E, Intrinsic::llround,
3333 Intrinsic::experimental_constrained_llround));
3334
3335 case Builtin::BIlrint:
3336 case Builtin::BIlrintf:
3337 case Builtin::BIlrintl:
3338 case Builtin::BI__builtin_lrint:
3339 case Builtin::BI__builtin_lrintf:
3340 case Builtin::BI__builtin_lrintl:
3341 case Builtin::BI__builtin_lrintf128:
3343 *this, E, Intrinsic::lrint,
3344 Intrinsic::experimental_constrained_lrint));
3345
3346 case Builtin::BIllrint:
3347 case Builtin::BIllrintf:
3348 case Builtin::BIllrintl:
3349 case Builtin::BI__builtin_llrint:
3350 case Builtin::BI__builtin_llrintf:
3351 case Builtin::BI__builtin_llrintl:
3352 case Builtin::BI__builtin_llrintf128:
3354 *this, E, Intrinsic::llrint,
3355 Intrinsic::experimental_constrained_llrint));
3356 case Builtin::BI__builtin_ldexp:
3357 case Builtin::BI__builtin_ldexpf:
3358 case Builtin::BI__builtin_ldexpl:
3359 case Builtin::BI__builtin_ldexpf16:
3360 case Builtin::BI__builtin_ldexpf128: {
3362 *this, E, Intrinsic::ldexp,
3363 Intrinsic::experimental_constrained_ldexp));
3364 }
3365 default:
3366 break;
3367 }
3368 }
3369
3370 // Check NonnullAttribute/NullabilityArg and Alignment.
3371 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3372 unsigned ParmNum) {
3373 Value *Val = A.emitRawPointer(*this);
3374 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3375 ParmNum);
3376
3377 if (SanOpts.has(SanitizerKind::Alignment)) {
3378 SanitizerSet SkippedChecks;
3379 SkippedChecks.set(SanitizerKind::All);
3380 SkippedChecks.clear(SanitizerKind::Alignment);
3381 SourceLocation Loc = Arg->getExprLoc();
3382 // Strip an implicit cast.
3383 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3384 if (CE->getCastKind() == CK_BitCast)
3385 Arg = CE->getSubExpr();
3386 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3387 SkippedChecks);
3388 }
3389 };
3390
3391 switch (BuiltinIDIfNoAsmLabel) {
3392 default: break;
3393 case Builtin::BI__builtin___CFStringMakeConstantString:
3394 case Builtin::BI__builtin___NSStringMakeConstantString:
3395 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3396 case Builtin::BI__builtin_stdarg_start:
3397 case Builtin::BI__builtin_va_start:
3398 case Builtin::BI__va_start:
3399 case Builtin::BI__builtin_va_end:
3400 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3401 ? EmitScalarExpr(E->getArg(0))
3402 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3403 BuiltinID != Builtin::BI__builtin_va_end);
3404 return RValue::get(nullptr);
3405 case Builtin::BI__builtin_va_copy: {
3406 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3407 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3408 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3409 {DstPtr, SrcPtr});
3410 return RValue::get(nullptr);
3411 }
3412 case Builtin::BIabs:
3413 case Builtin::BIlabs:
3414 case Builtin::BIllabs:
3415 case Builtin::BI__builtin_abs:
3416 case Builtin::BI__builtin_labs:
3417 case Builtin::BI__builtin_llabs: {
3418 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3419
3420 Value *Result;
3421 switch (getLangOpts().getSignedOverflowBehavior()) {
3423 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3424 break;
3426 if (!SanitizeOverflow) {
3427 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3428 break;
3429 }
3430 [[fallthrough]];
3432 // TODO: Somehow handle the corner case when the address of abs is taken.
3433 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3434 break;
3435 }
3436 return RValue::get(Result);
3437 }
3438 case Builtin::BI__builtin_complex: {
3439 Value *Real = EmitScalarExpr(E->getArg(0));
3440 Value *Imag = EmitScalarExpr(E->getArg(1));
3441 return RValue::getComplex({Real, Imag});
3442 }
3443 case Builtin::BI__builtin_conj:
3444 case Builtin::BI__builtin_conjf:
3445 case Builtin::BI__builtin_conjl:
3446 case Builtin::BIconj:
3447 case Builtin::BIconjf:
3448 case Builtin::BIconjl: {
3449 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3450 Value *Real = ComplexVal.first;
3451 Value *Imag = ComplexVal.second;
3452 Imag = Builder.CreateFNeg(Imag, "neg");
3453 return RValue::getComplex(std::make_pair(Real, Imag));
3454 }
3455 case Builtin::BI__builtin_creal:
3456 case Builtin::BI__builtin_crealf:
3457 case Builtin::BI__builtin_creall:
3458 case Builtin::BIcreal:
3459 case Builtin::BIcrealf:
3460 case Builtin::BIcreall: {
3461 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3462 return RValue::get(ComplexVal.first);
3463 }
3464
3465 case Builtin::BI__builtin_preserve_access_index: {
3466 // Only enabled preserved access index region when debuginfo
3467 // is available as debuginfo is needed to preserve user-level
3468 // access pattern.
3469 if (!getDebugInfo()) {
3470 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3471 return RValue::get(EmitScalarExpr(E->getArg(0)));
3472 }
3473
3474 // Nested builtin_preserve_access_index() not supported
3476 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3477 return RValue::get(EmitScalarExpr(E->getArg(0)));
3478 }
3479
3480 IsInPreservedAIRegion = true;
3481 Value *Res = EmitScalarExpr(E->getArg(0));
3482 IsInPreservedAIRegion = false;
3483 return RValue::get(Res);
3484 }
3485
3486 case Builtin::BI__builtin_cimag:
3487 case Builtin::BI__builtin_cimagf:
3488 case Builtin::BI__builtin_cimagl:
3489 case Builtin::BIcimag:
3490 case Builtin::BIcimagf:
3491 case Builtin::BIcimagl: {
3492 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3493 return RValue::get(ComplexVal.second);
3494 }
3495
3496 case Builtin::BI__builtin_clrsb:
3497 case Builtin::BI__builtin_clrsbl:
3498 case Builtin::BI__builtin_clrsbll: {
3499 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3500 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3501
3502 llvm::Type *ArgType = ArgValue->getType();
3503 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3504
3505 llvm::Type *ResultType = ConvertType(E->getType());
3506 Value *Zero = llvm::Constant::getNullValue(ArgType);
3507 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3508 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3509 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3510 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3511 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3512 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3513 "cast");
3514 return RValue::get(Result);
3515 }
3516 case Builtin::BI__builtin_ctzs:
3517 case Builtin::BI__builtin_ctz:
3518 case Builtin::BI__builtin_ctzl:
3519 case Builtin::BI__builtin_ctzll:
3520 case Builtin::BI__builtin_ctzg: {
3521 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3522 E->getNumArgs() > 1;
3523
3524 Value *ArgValue =
3525 HasFallback ? EmitScalarExpr(E->getArg(0))
3527
3528 llvm::Type *ArgType = ArgValue->getType();
3529 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3530
3531 llvm::Type *ResultType = ConvertType(E->getType());
3532 Value *ZeroUndef =
3533 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3534 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3535 if (Result->getType() != ResultType)
3536 Result =
3537 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3538 if (!HasFallback)
3539 return RValue::get(Result);
3540
3541 Value *Zero = Constant::getNullValue(ArgType);
3542 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3543 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3544 Value *ResultOrFallback =
3545 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3546 return RValue::get(ResultOrFallback);
3547 }
3548 case Builtin::BI__builtin_clzs:
3549 case Builtin::BI__builtin_clz:
3550 case Builtin::BI__builtin_clzl:
3551 case Builtin::BI__builtin_clzll:
3552 case Builtin::BI__builtin_clzg: {
3553 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3554 E->getNumArgs() > 1;
3555
3556 Value *ArgValue =
3557 HasFallback ? EmitScalarExpr(E->getArg(0))
3559
3560 llvm::Type *ArgType = ArgValue->getType();
3561 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3562
3563 llvm::Type *ResultType = ConvertType(E->getType());
3564 Value *ZeroUndef =
3565 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3566 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3567 if (Result->getType() != ResultType)
3568 Result =
3569 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3570 if (!HasFallback)
3571 return RValue::get(Result);
3572
3573 Value *Zero = Constant::getNullValue(ArgType);
3574 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3575 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3576 Value *ResultOrFallback =
3577 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3578 return RValue::get(ResultOrFallback);
3579 }
3580 case Builtin::BI__builtin_ffs:
3581 case Builtin::BI__builtin_ffsl:
3582 case Builtin::BI__builtin_ffsll: {
3583 // ffs(x) -> x ? cttz(x) + 1 : 0
3584 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3585
3586 llvm::Type *ArgType = ArgValue->getType();
3587 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3588
3589 llvm::Type *ResultType = ConvertType(E->getType());
3590 Value *Tmp =
3591 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3592 llvm::ConstantInt::get(ArgType, 1));
3593 Value *Zero = llvm::Constant::getNullValue(ArgType);
3594 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3595 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3596 if (Result->getType() != ResultType)
3597 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3598 "cast");
3599 return RValue::get(Result);
3600 }
3601 case Builtin::BI__builtin_parity:
3602 case Builtin::BI__builtin_parityl:
3603 case Builtin::BI__builtin_parityll: {
3604 // parity(x) -> ctpop(x) & 1
3605 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3606
3607 llvm::Type *ArgType = ArgValue->getType();
3608 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3609
3610 llvm::Type *ResultType = ConvertType(E->getType());
3611 Value *Tmp = Builder.CreateCall(F, ArgValue);
3612 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3613 if (Result->getType() != ResultType)
3614 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3615 "cast");
3616 return RValue::get(Result);
3617 }
3618 case Builtin::BI__lzcnt16:
3619 case Builtin::BI__lzcnt:
3620 case Builtin::BI__lzcnt64: {
3621 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3622
3623 llvm::Type *ArgType = ArgValue->getType();
3624 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3625
3626 llvm::Type *ResultType = ConvertType(E->getType());
3627 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3628 if (Result->getType() != ResultType)
3629 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3630 "cast");
3631 return RValue::get(Result);
3632 }
3633 case Builtin::BI__popcnt16:
3634 case Builtin::BI__popcnt:
3635 case Builtin::BI__popcnt64:
3636 case Builtin::BI__builtin_popcount:
3637 case Builtin::BI__builtin_popcountl:
3638 case Builtin::BI__builtin_popcountll:
3639 case Builtin::BI__builtin_popcountg: {
3640 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3641
3642 llvm::Type *ArgType = ArgValue->getType();
3643 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3644
3645 llvm::Type *ResultType = ConvertType(E->getType());
3646 Value *Result = Builder.CreateCall(F, ArgValue);
3647 if (Result->getType() != ResultType)
3648 Result =
3649 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3650 return RValue::get(Result);
3651 }
3652 case Builtin::BI__builtin_unpredictable: {
3653 // Always return the argument of __builtin_unpredictable. LLVM does not
3654 // handle this builtin. Metadata for this builtin should be added directly
3655 // to instructions such as branches or switches that use it.
3656 return RValue::get(EmitScalarExpr(E->getArg(0)));
3657 }
3658 case Builtin::BI__builtin_expect: {
3659 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3660 llvm::Type *ArgType = ArgValue->getType();
3661
3662 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3663 // Don't generate llvm.expect on -O0 as the backend won't use it for
3664 // anything.
3665 // Note, we still IRGen ExpectedValue because it could have side-effects.
3666 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3667 return RValue::get(ArgValue);
3668
3669 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3670 Value *Result =
3671 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3672 return RValue::get(Result);
3673 }
3674 case Builtin::BI__builtin_expect_with_probability: {
3675 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3676 llvm::Type *ArgType = ArgValue->getType();
3677
3678 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3679 llvm::APFloat Probability(0.0);
3680 const Expr *ProbArg = E->getArg(2);
3681 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3682 assert(EvalSucceed && "probability should be able to evaluate as float");
3683 (void)EvalSucceed;
3684 bool LoseInfo = false;
3685 Probability.convert(llvm::APFloat::IEEEdouble(),
3686 llvm::RoundingMode::Dynamic, &LoseInfo);
3687 llvm::Type *Ty = ConvertType(ProbArg->getType());
3688 Constant *Confidence = ConstantFP::get(Ty, Probability);
3689 // Don't generate llvm.expect.with.probability on -O0 as the backend
3690 // won't use it for anything.
3691 // Note, we still IRGen ExpectedValue because it could have side-effects.
3692 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3693 return RValue::get(ArgValue);
3694
3695 Function *FnExpect =
3696 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3697 Value *Result = Builder.CreateCall(
3698 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3699 return RValue::get(Result);
3700 }
3701 case Builtin::BI__builtin_assume_aligned: {
3702 const Expr *Ptr = E->getArg(0);
3703 Value *PtrValue = EmitScalarExpr(Ptr);
3704 Value *OffsetValue =
3705 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3706
3707 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3708 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3709 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3710 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3711 llvm::Value::MaximumAlignment);
3712
3713 emitAlignmentAssumption(PtrValue, Ptr,
3714 /*The expr loc is sufficient.*/ SourceLocation(),
3715 AlignmentCI, OffsetValue);
3716 return RValue::get(PtrValue);
3717 }
3718 case Builtin::BI__assume:
3719 case Builtin::BI__builtin_assume: {
3720 if (E->getArg(0)->HasSideEffects(getContext()))
3721 return RValue::get(nullptr);
3722
3723 Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0));
3724 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3725 Builder.CreateCall(FnAssume, ArgValue);
3726 return RValue::get(nullptr);
3727 }
3728 case Builtin::BI__builtin_assume_separate_storage: {
3729 const Expr *Arg0 = E->getArg(0);
3730 const Expr *Arg1 = E->getArg(1);
3731
3732 Value *Value0 = EmitScalarExpr(Arg0);
3733 Value *Value1 = EmitScalarExpr(Arg1);
3734
3735 Value *Values[] = {Value0, Value1};
3736 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3737 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3738 return RValue::get(nullptr);
3739 }
3740 case Builtin::BI__builtin_allow_runtime_check: {
3741 StringRef Kind =
3742 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3743 LLVMContext &Ctx = CGM.getLLVMContext();
3744 llvm::Value *Allow = Builder.CreateCall(
3745 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3746 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3747 return RValue::get(Allow);
3748 }
3749 case Builtin::BI__arithmetic_fence: {
3750 // Create the builtin call if FastMath is selected, and the target
3751 // supports the builtin, otherwise just return the argument.
3752 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3753 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3754 bool isArithmeticFenceEnabled =
3755 FMF.allowReassoc() &&
3757 QualType ArgType = E->getArg(0)->getType();
3758 if (ArgType->isComplexType()) {
3759 if (isArithmeticFenceEnabled) {
3760 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3761 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3762 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3763 ConvertType(ElementType));
3764 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3765 ConvertType(ElementType));
3766 return RValue::getComplex(std::make_pair(Real, Imag));
3767 }
3768 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3769 Value *Real = ComplexVal.first;
3770 Value *Imag = ComplexVal.second;
3771 return RValue::getComplex(std::make_pair(Real, Imag));
3772 }
3773 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3774 if (isArithmeticFenceEnabled)
3775 return RValue::get(
3776 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3777 return RValue::get(ArgValue);
3778 }
3779 case Builtin::BI__builtin_bswap16:
3780 case Builtin::BI__builtin_bswap32:
3781 case Builtin::BI__builtin_bswap64:
3782 case Builtin::BI_byteswap_ushort:
3783 case Builtin::BI_byteswap_ulong:
3784 case Builtin::BI_byteswap_uint64: {
3785 return RValue::get(
3786 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3787 }
3788 case Builtin::BI__builtin_bitreverse8:
3789 case Builtin::BI__builtin_bitreverse16:
3790 case Builtin::BI__builtin_bitreverse32:
3791 case Builtin::BI__builtin_bitreverse64: {
3792 return RValue::get(
3793 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3794 }
3795 case Builtin::BI__builtin_rotateleft8:
3796 case Builtin::BI__builtin_rotateleft16:
3797 case Builtin::BI__builtin_rotateleft32:
3798 case Builtin::BI__builtin_rotateleft64:
3799 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3800 case Builtin::BI_rotl16:
3801 case Builtin::BI_rotl:
3802 case Builtin::BI_lrotl:
3803 case Builtin::BI_rotl64:
3804 return emitRotate(E, false);
3805
3806 case Builtin::BI__builtin_rotateright8:
3807 case Builtin::BI__builtin_rotateright16:
3808 case Builtin::BI__builtin_rotateright32:
3809 case Builtin::BI__builtin_rotateright64:
3810 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3811 case Builtin::BI_rotr16:
3812 case Builtin::BI_rotr:
3813 case Builtin::BI_lrotr:
3814 case Builtin::BI_rotr64:
3815 return emitRotate(E, true);
3816
3817 case Builtin::BI__builtin_constant_p: {
3818 llvm::Type *ResultType = ConvertType(E->getType());
3819
3820 const Expr *Arg = E->getArg(0);
3821 QualType ArgType = Arg->getType();
3822 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3823 // and likely a mistake.
3824 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3825 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3826 // Per the GCC documentation, only numeric constants are recognized after
3827 // inlining.
3828 return RValue::get(ConstantInt::get(ResultType, 0));
3829
3830 if (Arg->HasSideEffects(getContext()))
3831 // The argument is unevaluated, so be conservative if it might have
3832 // side-effects.
3833 return RValue::get(ConstantInt::get(ResultType, 0));
3834
3835 Value *ArgValue = EmitScalarExpr(Arg);
3836 if (ArgType->isObjCObjectPointerType()) {
3837 // Convert Objective-C objects to id because we cannot distinguish between
3838 // LLVM types for Obj-C classes as they are opaque.
3839 ArgType = CGM.getContext().getObjCIdType();
3840 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3841 }
3842 Function *F =
3843 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3844 Value *Result = Builder.CreateCall(F, ArgValue);
3845 if (Result->getType() != ResultType)
3846 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3847 return RValue::get(Result);
3848 }
3849 case Builtin::BI__builtin_dynamic_object_size:
3850 case Builtin::BI__builtin_object_size: {
3851 unsigned Type =
3852 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3853 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3854
3855 // We pass this builtin onto the optimizer so that it can figure out the
3856 // object size in more complex cases.
3857 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3858 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3859 /*EmittedE=*/nullptr, IsDynamic));
3860 }
3861 case Builtin::BI__builtin_counted_by_ref: {
3862 // Default to returning '(void *) 0'.
3863 llvm::Value *Result = llvm::ConstantPointerNull::get(
3864 llvm::PointerType::getUnqual(getLLVMContext()));
3865
3866 const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts();
3867
3868 if (auto *UO = dyn_cast<UnaryOperator>(Arg);
3869 UO && UO->getOpcode() == UO_AddrOf) {
3870 Arg = UO->getSubExpr()->IgnoreParenImpCasts();
3871
3872 if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3873 Arg = ASE->getBase()->IgnoreParenImpCasts();
3874 }
3875
3876 if (const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3877 if (auto *CATy =
3878 ME->getMemberDecl()->getType()->getAs<CountAttributedType>();
3879 CATy && CATy->getKind() == CountAttributedType::CountedBy) {
3880 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3881 if (const FieldDecl *CountFD = FAMDecl->findCountedByField())
3882 Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD);
3883 else
3884 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3885 }
3886 }
3887
3888 return RValue::get(Result);
3889 }
3890 case Builtin::BI__builtin_prefetch: {
3891 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3892 // FIXME: Technically these constants should of type 'int', yes?
3893 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3894 llvm::ConstantInt::get(Int32Ty, 0);
3895 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3896 llvm::ConstantInt::get(Int32Ty, 3);
3897 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3898 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3899 Builder.CreateCall(F, {Address, RW, Locality, Data});
3900 return RValue::get(nullptr);
3901 }
3902 case Builtin::BI__builtin_readcyclecounter: {
3903 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3904 return RValue::get(Builder.CreateCall(F));
3905 }
3906 case Builtin::BI__builtin_readsteadycounter: {
3907 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3908 return RValue::get(Builder.CreateCall(F));
3909 }
3910 case Builtin::BI__builtin___clear_cache: {
3911 Value *Begin = EmitScalarExpr(E->getArg(0));
3912 Value *End = EmitScalarExpr(E->getArg(1));
3913 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3914 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3915 }
3916 case Builtin::BI__builtin_trap:
3917 EmitTrapCall(Intrinsic::trap);
3918 return RValue::get(nullptr);
3919 case Builtin::BI__builtin_verbose_trap: {
3920 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3921 if (getDebugInfo()) {
3922 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3923 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3924 *E->getArg(1)->tryEvaluateString(getContext()));
3925 }
3926 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3927 // Currently no attempt is made to prevent traps from being merged.
3928 EmitTrapCall(Intrinsic::trap);
3929 return RValue::get(nullptr);
3930 }
3931 case Builtin::BI__debugbreak:
3932 EmitTrapCall(Intrinsic::debugtrap);
3933 return RValue::get(nullptr);
3934 case Builtin::BI__builtin_unreachable: {
3936
3937 // We do need to preserve an insertion point.
3938 EmitBlock(createBasicBlock("unreachable.cont"));
3939
3940 return RValue::get(nullptr);
3941 }
3942
3943 case Builtin::BI__builtin_powi:
3944 case Builtin::BI__builtin_powif:
3945 case Builtin::BI__builtin_powil: {
3946 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3947 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3948
3949 if (Builder.getIsFPConstrained()) {
3950 // FIXME: llvm.powi has 2 mangling types,
3951 // llvm.experimental.constrained.powi has one.
3952 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3953 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3954 Src0->getType());
3955 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3956 }
3957
3958 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3959 { Src0->getType(), Src1->getType() });
3960 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3961 }
3962 case Builtin::BI__builtin_frexpl: {
3963 // Linux PPC will not be adding additional PPCDoubleDouble support.
3964 // WIP to switch default to IEEE long double. Will emit libcall for
3965 // frexpl instead of legalizing this type in the BE.
3966 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3967 break;
3968 [[fallthrough]];
3969 }
3970 case Builtin::BI__builtin_frexp:
3971 case Builtin::BI__builtin_frexpf:
3972 case Builtin::BI__builtin_frexpf128:
3973 case Builtin::BI__builtin_frexpf16:
3974 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3975 case Builtin::BI__builtin_isgreater:
3976 case Builtin::BI__builtin_isgreaterequal:
3977 case Builtin::BI__builtin_isless:
3978 case Builtin::BI__builtin_islessequal:
3979 case Builtin::BI__builtin_islessgreater:
3980 case Builtin::BI__builtin_isunordered: {
3981 // Ordered comparisons: we know the arguments to these are matching scalar
3982 // floating point values.
3983 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3984 Value *LHS = EmitScalarExpr(E->getArg(0));
3985 Value *RHS = EmitScalarExpr(E->getArg(1));
3986
3987 switch (BuiltinID) {
3988 default: llvm_unreachable("Unknown ordered comparison");
3989 case Builtin::BI__builtin_isgreater:
3990 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3991 break;
3992 case Builtin::BI__builtin_isgreaterequal:
3993 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3994 break;
3995 case Builtin::BI__builtin_isless:
3996 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3997 break;
3998 case Builtin::BI__builtin_islessequal:
3999 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
4000 break;
4001 case Builtin::BI__builtin_islessgreater:
4002 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
4003 break;
4004 case Builtin::BI__builtin_isunordered:
4005 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
4006 break;
4007 }
4008 // ZExt bool to int type.
4009 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
4010 }
4011
4012 case Builtin::BI__builtin_isnan: {
4013 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4014 Value *V = EmitScalarExpr(E->getArg(0));
4015 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4016 return RValue::get(Result);
4017 return RValue::get(
4018 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
4019 ConvertType(E->getType())));
4020 }
4021
4022 case Builtin::BI__builtin_issignaling: {
4023 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4024 Value *V = EmitScalarExpr(E->getArg(0));
4025 return RValue::get(
4026 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
4027 ConvertType(E->getType())));
4028 }
4029
4030 case Builtin::BI__builtin_isinf: {
4031 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4032 Value *V = EmitScalarExpr(E->getArg(0));
4033 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4034 return RValue::get(Result);
4035 return RValue::get(
4036 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
4037 ConvertType(E->getType())));
4038 }
4039
4040 case Builtin::BIfinite:
4041 case Builtin::BI__finite:
4042 case Builtin::BIfinitef:
4043 case Builtin::BI__finitef:
4044 case Builtin::BIfinitel:
4045 case Builtin::BI__finitel:
4046 case Builtin::BI__builtin_isfinite: {
4047 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4048 Value *V = EmitScalarExpr(E->getArg(0));
4049 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4050 return RValue::get(Result);
4051 return RValue::get(
4052 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
4053 ConvertType(E->getType())));
4054 }
4055
4056 case Builtin::BI__builtin_isnormal: {
4057 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4058 Value *V = EmitScalarExpr(E->getArg(0));
4059 return RValue::get(
4060 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
4061 ConvertType(E->getType())));
4062 }
4063
4064 case Builtin::BI__builtin_issubnormal: {
4065 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4066 Value *V = EmitScalarExpr(E->getArg(0));
4067 return RValue::get(
4068 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
4069 ConvertType(E->getType())));
4070 }
4071
4072 case Builtin::BI__builtin_iszero: {
4073 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4074 Value *V = EmitScalarExpr(E->getArg(0));
4075 return RValue::get(
4076 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
4077 ConvertType(E->getType())));
4078 }
4079
4080 case Builtin::BI__builtin_isfpclass: {
4082 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
4083 break;
4084 uint64_t Test = Result.Val.getInt().getLimitedValue();
4085 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4086 Value *V = EmitScalarExpr(E->getArg(0));
4087 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
4088 ConvertType(E->getType())));
4089 }
4090
4091 case Builtin::BI__builtin_nondeterministic_value: {
4092 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
4093
4094 Value *Result = PoisonValue::get(Ty);
4095 Result = Builder.CreateFreeze(Result);
4096
4097 return RValue::get(Result);
4098 }
4099
4100 case Builtin::BI__builtin_elementwise_abs: {
4101 Value *Result;
4102 QualType QT = E->getArg(0)->getType();
4103
4104 if (auto *VecTy = QT->getAs<VectorType>())
4105 QT = VecTy->getElementType();
4106 if (QT->isIntegerType())
4107 Result = Builder.CreateBinaryIntrinsic(
4108 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
4109 Builder.getFalse(), nullptr, "elt.abs");
4110 else
4111 Result = emitBuiltinWithOneOverloadedType<1>(
4112 *this, E, llvm::Intrinsic::fabs, "elt.abs");
4113
4114 return RValue::get(Result);
4115 }
4116 case Builtin::BI__builtin_elementwise_acos:
4117 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4118 *this, E, llvm::Intrinsic::acos, "elt.acos"));
4119 case Builtin::BI__builtin_elementwise_asin:
4120 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4121 *this, E, llvm::Intrinsic::asin, "elt.asin"));
4122 case Builtin::BI__builtin_elementwise_atan:
4123 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4124 *this, E, llvm::Intrinsic::atan, "elt.atan"));
4125 case Builtin::BI__builtin_elementwise_atan2:
4126 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4127 *this, E, llvm::Intrinsic::atan2, "elt.atan2"));
4128 case Builtin::BI__builtin_elementwise_ceil:
4129 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4130 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
4131 case Builtin::BI__builtin_elementwise_exp:
4132 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4133 *this, E, llvm::Intrinsic::exp, "elt.exp"));
4134 case Builtin::BI__builtin_elementwise_exp2:
4135 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4136 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
4137 case Builtin::BI__builtin_elementwise_log:
4138 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4139 *this, E, llvm::Intrinsic::log, "elt.log"));
4140 case Builtin::BI__builtin_elementwise_log2:
4141 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4142 *this, E, llvm::Intrinsic::log2, "elt.log2"));
4143 case Builtin::BI__builtin_elementwise_log10:
4144 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4145 *this, E, llvm::Intrinsic::log10, "elt.log10"));
4146 case Builtin::BI__builtin_elementwise_pow: {
4147 return RValue::get(
4148 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
4149 }
4150 case Builtin::BI__builtin_elementwise_bitreverse:
4151 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4152 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
4153 case Builtin::BI__builtin_elementwise_cos:
4154 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4155 *this, E, llvm::Intrinsic::cos, "elt.cos"));
4156 case Builtin::BI__builtin_elementwise_cosh:
4157 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4158 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
4159 case Builtin::BI__builtin_elementwise_floor:
4160 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4161 *this, E, llvm::Intrinsic::floor, "elt.floor"));
4162 case Builtin::BI__builtin_elementwise_popcount:
4163 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4164 *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
4165 case Builtin::BI__builtin_elementwise_roundeven:
4166 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4167 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
4168 case Builtin::BI__builtin_elementwise_round:
4169 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4170 *this, E, llvm::Intrinsic::round, "elt.round"));
4171 case Builtin::BI__builtin_elementwise_rint:
4172 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4173 *this, E, llvm::Intrinsic::rint, "elt.rint"));
4174 case Builtin::BI__builtin_elementwise_nearbyint:
4175 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4176 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
4177 case Builtin::BI__builtin_elementwise_sin:
4178 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4179 *this, E, llvm::Intrinsic::sin, "elt.sin"));
4180 case Builtin::BI__builtin_elementwise_sinh:
4181 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4182 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
4183 case Builtin::BI__builtin_elementwise_tan:
4184 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4185 *this, E, llvm::Intrinsic::tan, "elt.tan"));
4186 case Builtin::BI__builtin_elementwise_tanh:
4187 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4188 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
4189 case Builtin::BI__builtin_elementwise_trunc:
4190 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4191 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
4192 case Builtin::BI__builtin_elementwise_canonicalize:
4193 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4194 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
4195 case Builtin::BI__builtin_elementwise_copysign:
4196 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4197 *this, E, llvm::Intrinsic::copysign));
4198 case Builtin::BI__builtin_elementwise_fma:
4199 return RValue::get(
4200 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
4201 case Builtin::BI__builtin_elementwise_add_sat:
4202 case Builtin::BI__builtin_elementwise_sub_sat: {
4203 Value *Op0 = EmitScalarExpr(E->getArg(0));
4204 Value *Op1 = EmitScalarExpr(E->getArg(1));
4205 Value *Result;
4206 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
4207 QualType Ty = E->getArg(0)->getType();
4208 if (auto *VecTy = Ty->getAs<VectorType>())
4209 Ty = VecTy->getElementType();
4210 bool IsSigned = Ty->isSignedIntegerType();
4211 unsigned Opc;
4212 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4213 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4214 else
4215 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4216 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
4217 return RValue::get(Result);
4218 }
4219
4220 case Builtin::BI__builtin_elementwise_max: {
4221 Value *Op0 = EmitScalarExpr(E->getArg(0));
4222 Value *Op1 = EmitScalarExpr(E->getArg(1));
4223 Value *Result;
4224 if (Op0->getType()->isIntOrIntVectorTy()) {
4225 QualType Ty = E->getArg(0)->getType();
4226 if (auto *VecTy = Ty->getAs<VectorType>())
4227 Ty = VecTy->getElementType();
4228 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4229 ? llvm::Intrinsic::smax
4230 : llvm::Intrinsic::umax,
4231 Op0, Op1, nullptr, "elt.max");
4232 } else
4233 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
4234 return RValue::get(Result);
4235 }
4236 case Builtin::BI__builtin_elementwise_min: {
4237 Value *Op0 = EmitScalarExpr(E->getArg(0));
4238 Value *Op1 = EmitScalarExpr(E->getArg(1));
4239 Value *Result;
4240 if (Op0->getType()->isIntOrIntVectorTy()) {
4241 QualType Ty = E->getArg(0)->getType();
4242 if (auto *VecTy = Ty->getAs<VectorType>())
4243 Ty = VecTy->getElementType();
4244 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4245 ? llvm::Intrinsic::smin
4246 : llvm::Intrinsic::umin,
4247 Op0, Op1, nullptr, "elt.min");
4248 } else
4249 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
4250 return RValue::get(Result);
4251 }
4252
4253 case Builtin::BI__builtin_elementwise_maximum: {
4254 Value *Op0 = EmitScalarExpr(E->getArg(0));
4255 Value *Op1 = EmitScalarExpr(E->getArg(1));
4256 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
4257 Op1, nullptr, "elt.maximum");
4258 return RValue::get(Result);
4259 }
4260
4261 case Builtin::BI__builtin_elementwise_minimum: {
4262 Value *Op0 = EmitScalarExpr(E->getArg(0));
4263 Value *Op1 = EmitScalarExpr(E->getArg(1));
4264 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
4265 Op1, nullptr, "elt.minimum");
4266 return RValue::get(Result);
4267 }
4268
4269 case Builtin::BI__builtin_reduce_max: {
4270 auto GetIntrinsicID = [this](QualType QT) {
4271 if (auto *VecTy = QT->getAs<VectorType>())
4272 QT = VecTy->getElementType();
4273 else if (QT->isSizelessVectorType())
4275
4276 if (QT->isSignedIntegerType())
4277 return llvm::Intrinsic::vector_reduce_smax;
4278 if (QT->isUnsignedIntegerType())
4279 return llvm::Intrinsic::vector_reduce_umax;
4280 assert(QT->isFloatingType() && "must have a float here");
4281 return llvm::Intrinsic::vector_reduce_fmax;
4282 };
4283 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4284 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4285 }
4286
4287 case Builtin::BI__builtin_reduce_min: {
4288 auto GetIntrinsicID = [this](QualType QT) {
4289 if (auto *VecTy = QT->getAs<VectorType>())
4290 QT = VecTy->getElementType();
4291 else if (QT->isSizelessVectorType())
4293
4294 if (QT->isSignedIntegerType())
4295 return llvm::Intrinsic::vector_reduce_smin;
4296 if (QT->isUnsignedIntegerType())
4297 return llvm::Intrinsic::vector_reduce_umin;
4298 assert(QT->isFloatingType() && "must have a float here");
4299 return llvm::Intrinsic::vector_reduce_fmin;
4300 };
4301
4302 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4303 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4304 }
4305
4306 case Builtin::BI__builtin_reduce_add:
4307 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4308 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4309 case Builtin::BI__builtin_reduce_mul:
4310 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4311 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4312 case Builtin::BI__builtin_reduce_xor:
4313 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4314 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4315 case Builtin::BI__builtin_reduce_or:
4316 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4317 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4318 case Builtin::BI__builtin_reduce_and:
4319 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4320 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4321 case Builtin::BI__builtin_reduce_maximum:
4322 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4323 *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4324 case Builtin::BI__builtin_reduce_minimum:
4325 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4326 *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
4327
4328 case Builtin::BI__builtin_matrix_transpose: {
4329 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4330 Value *MatValue = EmitScalarExpr(E->getArg(0));
4331 MatrixBuilder MB(Builder);
4332 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4333 MatrixTy->getNumColumns());
4334 return RValue::get(Result);
4335 }
4336
4337 case Builtin::BI__builtin_matrix_column_major_load: {
4338 MatrixBuilder MB(Builder);
4339 // Emit everything that isn't dependent on the first parameter type
4340 Value *Stride = EmitScalarExpr(E->getArg(3));
4341 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4342 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4343 assert(PtrTy && "arg0 must be of pointer type");
4344 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4345
4346 Address Src = EmitPointerWithAlignment(E->getArg(0));
4348 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4349 0);
4350 Value *Result = MB.CreateColumnMajorLoad(
4351 Src.getElementType(), Src.emitRawPointer(*this),
4352 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4353 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4354 return RValue::get(Result);
4355 }
4356
4357 case Builtin::BI__builtin_matrix_column_major_store: {
4358 MatrixBuilder MB(Builder);
4359 Value *Matrix = EmitScalarExpr(E->getArg(0));
4360 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4361 Value *Stride = EmitScalarExpr(E->getArg(2));
4362
4363 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4364 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4365 assert(PtrTy && "arg1 must be of pointer type");
4366 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4367
4369 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4370 0);
4371 Value *Result = MB.CreateColumnMajorStore(
4372 Matrix, Dst.emitRawPointer(*this),
4373 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4374 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4375 return RValue::get(Result);
4376 }
4377
4378 case Builtin::BI__builtin_isinf_sign: {
4379 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4380 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4381 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4382 Value *Arg = EmitScalarExpr(E->getArg(0));
4383 Value *AbsArg = EmitFAbs(*this, Arg);
4384 Value *IsInf = Builder.CreateFCmpOEQ(
4385 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4386 Value *IsNeg = EmitSignBit(*this, Arg);
4387
4388 llvm::Type *IntTy = ConvertType(E->getType());
4389 Value *Zero = Constant::getNullValue(IntTy);
4390 Value *One = ConstantInt::get(IntTy, 1);
4391 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4392 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4393 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4394 return RValue::get(Result);
4395 }
4396
4397 case Builtin::BI__builtin_flt_rounds: {
4398 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4399
4400 llvm::Type *ResultType = ConvertType(E->getType());
4401 Value *Result = Builder.CreateCall(F);
4402 if (Result->getType() != ResultType)
4403 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4404 "cast");
4405 return RValue::get(Result);
4406 }
4407
4408 case Builtin::BI__builtin_set_flt_rounds: {
4409 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4410
4411 Value *V = EmitScalarExpr(E->getArg(0));
4412 Builder.CreateCall(F, V);
4413 return RValue::get(nullptr);
4414 }
4415
4416 case Builtin::BI__builtin_fpclassify: {
4417 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4418 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4419 Value *V = EmitScalarExpr(E->getArg(5));
4420 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4421
4422 // Create Result
4423 BasicBlock *Begin = Builder.GetInsertBlock();
4424 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4425 Builder.SetInsertPoint(End);
4426 PHINode *Result =
4427 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4428 "fpclassify_result");
4429
4430 // if (V==0) return FP_ZERO
4431 Builder.SetInsertPoint(Begin);
4432 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4433 "iszero");
4434 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4435 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4436 Builder.CreateCondBr(IsZero, End, NotZero);
4437 Result->addIncoming(ZeroLiteral, Begin);
4438
4439 // if (V != V) return FP_NAN
4440 Builder.SetInsertPoint(NotZero);
4441 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4442 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4443 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4444 Builder.CreateCondBr(IsNan, End, NotNan);
4445 Result->addIncoming(NanLiteral, NotZero);
4446
4447 // if (fabs(V) == infinity) return FP_INFINITY
4448 Builder.SetInsertPoint(NotNan);
4449 Value *VAbs = EmitFAbs(*this, V);
4450 Value *IsInf =
4451 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4452 "isinf");
4453 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4454 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4455 Builder.CreateCondBr(IsInf, End, NotInf);
4456 Result->addIncoming(InfLiteral, NotNan);
4457
4458 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4459 Builder.SetInsertPoint(NotInf);
4460 APFloat Smallest = APFloat::getSmallestNormalized(
4461 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4462 Value *IsNormal =
4463 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4464 "isnormal");
4465 Value *NormalResult =
4466 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4467 EmitScalarExpr(E->getArg(3)));
4468 Builder.CreateBr(End);
4469 Result->addIncoming(NormalResult, NotInf);
4470
4471 // return Result
4472 Builder.SetInsertPoint(End);
4473 return RValue::get(Result);
4474 }
4475
4476 // An alloca will always return a pointer to the alloca (stack) address
4477 // space. This address space need not be the same as the AST / Language
4478 // default (e.g. in C / C++ auto vars are in the generic address space). At
4479 // the AST level this is handled within CreateTempAlloca et al., but for the
4480 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4481 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4482 case Builtin::BIalloca:
4483 case Builtin::BI_alloca:
4484 case Builtin::BI__builtin_alloca_uninitialized:
4485 case Builtin::BI__builtin_alloca: {
4486 Value *Size = EmitScalarExpr(E->getArg(0));
4487 const TargetInfo &TI = getContext().getTargetInfo();
4488 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4489 const Align SuitableAlignmentInBytes =
4490 CGM.getContext()
4492 .getAsAlign();
4493 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4494 AI->setAlignment(SuitableAlignmentInBytes);
4495 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4496 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4499 if (AAS != EAS) {
4500 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4501 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4502 EAS, Ty));
4503 }
4504 return RValue::get(AI);
4505 }
4506
4507 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4508 case Builtin::BI__builtin_alloca_with_align: {
4509 Value *Size = EmitScalarExpr(E->getArg(0));
4510 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4511 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4512 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4513 const Align AlignmentInBytes =
4514 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4515 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4516 AI->setAlignment(AlignmentInBytes);
4517 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4518 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4521 if (AAS != EAS) {
4522 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4523 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4524 EAS, Ty));
4525 }
4526 return RValue::get(AI);
4527 }
4528
4529 case Builtin::BIbzero:
4530 case Builtin::BI__builtin_bzero: {
4531 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4532 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4533 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4534 E->getArg(0)->getExprLoc(), FD, 0);
4535 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4536 return RValue::get(nullptr);
4537 }
4538
4539 case Builtin::BIbcopy:
4540 case Builtin::BI__builtin_bcopy: {
4541 Address Src = EmitPointerWithAlignment(E->getArg(0));
4542 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4543 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4545 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4546 0);
4548 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4549 0);
4550 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4551 return RValue::get(nullptr);
4552 }
4553
4554 case Builtin::BImemcpy:
4555 case Builtin::BI__builtin_memcpy:
4556 case Builtin::BImempcpy:
4557 case Builtin::BI__builtin_mempcpy: {
4558 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4559 Address Src = EmitPointerWithAlignment(E->getArg(1));
4560 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4561 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4562 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4563 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4564 if (BuiltinID == Builtin::BImempcpy ||
4565 BuiltinID == Builtin::BI__builtin_mempcpy)
4567 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4568 else
4569 return RValue::get(Dest, *this);
4570 }
4571
4572 case Builtin::BI__builtin_memcpy_inline: {
4573 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4574 Address Src = EmitPointerWithAlignment(E->getArg(1));
4575 uint64_t Size =
4576 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4577 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4578 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4579 Builder.CreateMemCpyInline(Dest, Src, Size);
4580 return RValue::get(nullptr);
4581 }
4582
4583 case Builtin::BI__builtin_char_memchr:
4584 BuiltinID = Builtin::BI__builtin_memchr;
4585 break;
4586
4587 case Builtin::BI__builtin___memcpy_chk: {
4588 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4589 Expr::EvalResult SizeResult, DstSizeResult;
4590 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4591 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4592 break;
4593 llvm::APSInt Size = SizeResult.Val.getInt();
4594 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4595 if (Size.ugt(DstSize))
4596 break;
4597 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4598 Address Src = EmitPointerWithAlignment(E->getArg(1));
4599 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4600 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4601 return RValue::get(Dest, *this);
4602 }
4603
4604 case Builtin::BI__builtin_objc_memmove_collectable: {
4605 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4606 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4607 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4609 DestAddr, SrcAddr, SizeVal);
4610 return RValue::get(DestAddr, *this);
4611 }
4612
4613 case Builtin::BI__builtin___memmove_chk: {
4614 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4615 Expr::EvalResult SizeResult, DstSizeResult;
4616 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4617 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4618 break;
4619 llvm::APSInt Size = SizeResult.Val.getInt();
4620 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4621 if (Size.ugt(DstSize))
4622 break;
4623 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4624 Address Src = EmitPointerWithAlignment(E->getArg(1));
4625 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4626 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4627 return RValue::get(Dest, *this);
4628 }
4629
4630 case Builtin::BImemmove:
4631 case Builtin::BI__builtin_memmove: {
4632 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4633 Address Src = EmitPointerWithAlignment(E->getArg(1));
4634 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4635 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4636 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4637 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4638 return RValue::get(Dest, *this);
4639 }
4640 case Builtin::BImemset:
4641 case Builtin::BI__builtin_memset: {
4642 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4643 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4644 Builder.getInt8Ty());
4645 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4646 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4647 E->getArg(0)->getExprLoc(), FD, 0);
4648 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4649 return RValue::get(Dest, *this);
4650 }
4651 case Builtin::BI__builtin_memset_inline: {
4652 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4653 Value *ByteVal =
4654 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4655 uint64_t Size =
4656 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4658 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4659 0);
4660 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4661 return RValue::get(nullptr);
4662 }
4663 case Builtin::BI__builtin___memset_chk: {
4664 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4665 Expr::EvalResult SizeResult, DstSizeResult;
4666 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4667 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4668 break;
4669 llvm::APSInt Size = SizeResult.Val.getInt();
4670 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4671 if (Size.ugt(DstSize))
4672 break;
4673 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4674 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4675 Builder.getInt8Ty());
4676 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4677 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4678 return RValue::get(Dest, *this);
4679 }
4680 case Builtin::BI__builtin_wmemchr: {
4681 // The MSVC runtime library does not provide a definition of wmemchr, so we
4682 // need an inline implementation.
4683 if (!getTarget().getTriple().isOSMSVCRT())
4684 break;
4685
4686 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4687 Value *Str = EmitScalarExpr(E->getArg(0));
4688 Value *Chr = EmitScalarExpr(E->getArg(1));
4689 Value *Size = EmitScalarExpr(E->getArg(2));
4690
4691 BasicBlock *Entry = Builder.GetInsertBlock();
4692 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4693 BasicBlock *Next = createBasicBlock("wmemchr.next");
4694 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4695 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4696 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4697
4698 EmitBlock(CmpEq);
4699 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4700 StrPhi->addIncoming(Str, Entry);
4701 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4702 SizePhi->addIncoming(Size, Entry);
4703 CharUnits WCharAlign =
4705 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4706 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4707 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4708 Builder.CreateCondBr(StrEqChr, Exit, Next);
4709
4710 EmitBlock(Next);
4711 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4712 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4713 Value *NextSizeEq0 =
4714 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4715 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4716 StrPhi->addIncoming(NextStr, Next);
4717 SizePhi->addIncoming(NextSize, Next);
4718
4719 EmitBlock(Exit);
4720 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4721 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4722 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4723 Ret->addIncoming(FoundChr, CmpEq);
4724 return RValue::get(Ret);
4725 }
4726 case Builtin::BI__builtin_wmemcmp: {
4727 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4728 // need an inline implementation.
4729 if (!getTarget().getTriple().isOSMSVCRT())
4730 break;
4731
4732 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4733
4734 Value *Dst = EmitScalarExpr(E->getArg(0));
4735 Value *Src = EmitScalarExpr(E->getArg(1));
4736 Value *Size = EmitScalarExpr(E->getArg(2));
4737
4738 BasicBlock *Entry = Builder.GetInsertBlock();
4739 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4740 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4741 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4742 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4743 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4744 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4745
4746 EmitBlock(CmpGT);
4747 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4748 DstPhi->addIncoming(Dst, Entry);
4749 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4750 SrcPhi->addIncoming(Src, Entry);
4751 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4752 SizePhi->addIncoming(Size, Entry);
4753 CharUnits WCharAlign =
4755 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4756 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4757 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4758 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4759
4760 EmitBlock(CmpLT);
4761 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4762 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4763
4764 EmitBlock(Next);
4765 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4766 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4767 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4768 Value *NextSizeEq0 =
4769 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4770 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4771 DstPhi->addIncoming(NextDst, Next);
4772 SrcPhi->addIncoming(NextSrc, Next);
4773 SizePhi->addIncoming(NextSize, Next);
4774
4775 EmitBlock(Exit);
4776 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4777 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4778 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4779 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4780 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4781 return RValue::get(Ret);
4782 }
4783 case Builtin::BI__builtin_dwarf_cfa: {
4784 // The offset in bytes from the first argument to the CFA.
4785 //
4786 // Why on earth is this in the frontend? Is there any reason at
4787 // all that the backend can't reasonably determine this while
4788 // lowering llvm.eh.dwarf.cfa()?
4789 //
4790 // TODO: If there's a satisfactory reason, add a target hook for
4791 // this instead of hard-coding 0, which is correct for most targets.
4792 int32_t Offset = 0;
4793
4794 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4795 return RValue::get(Builder.CreateCall(F,
4796 llvm::ConstantInt::get(Int32Ty, Offset)));
4797 }
4798 case Builtin::BI__builtin_return_address: {
4799 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4800 getContext().UnsignedIntTy);
4801 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4802 return RValue::get(Builder.CreateCall(F, Depth));
4803 }
4804 case Builtin::BI_ReturnAddress: {
4805 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4806 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4807 }
4808 case Builtin::BI__builtin_frame_address: {
4809 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4810 getContext().UnsignedIntTy);
4811 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4812 return RValue::get(Builder.CreateCall(F, Depth));
4813 }
4814 case Builtin::BI__builtin_extract_return_addr: {
4815 Value *Address = EmitScalarExpr(E->getArg(0));
4817 return RValue::get(Result);
4818 }
4819 case Builtin::BI__builtin_frob_return_addr: {
4820 Value *Address = EmitScalarExpr(E->getArg(0));
4822 return RValue::get(Result);
4823 }
4824 case Builtin::BI__builtin_dwarf_sp_column: {
4825 llvm::IntegerType *Ty
4826 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4828 if (Column == -1) {
4829 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4830 return RValue::get(llvm::UndefValue::get(Ty));
4831 }
4832 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4833 }
4834 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4835 Value *Address = EmitScalarExpr(E->getArg(0));
4836 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4837 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4838 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4839 }
4840 case Builtin::BI__builtin_eh_return: {
4841 Value *Int = EmitScalarExpr(E->getArg(0));
4842 Value *Ptr = EmitScalarExpr(E->getArg(1));
4843
4844 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4845 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4846 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4847 Function *F =
4848 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4849 : Intrinsic::eh_return_i64);
4850 Builder.CreateCall(F, {Int, Ptr});
4851 Builder.CreateUnreachable();
4852
4853 // We do need to preserve an insertion point.
4854 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4855
4856 return RValue::get(nullptr);
4857 }
4858 case Builtin::BI__builtin_unwind_init: {
4859 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4860 Builder.CreateCall(F);
4861 return RValue::get(nullptr);
4862 }
4863 case Builtin::BI__builtin_extend_pointer: {
4864 // Extends a pointer to the size of an _Unwind_Word, which is
4865 // uint64_t on all platforms. Generally this gets poked into a
4866 // register and eventually used as an address, so if the
4867 // addressing registers are wider than pointers and the platform
4868 // doesn't implicitly ignore high-order bits when doing
4869 // addressing, we need to make sure we zext / sext based on
4870 // the platform's expectations.
4871 //
4872 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4873
4874 // Cast the pointer to intptr_t.
4875 Value *Ptr = EmitScalarExpr(E->getArg(0));
4876 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4877
4878 // If that's 64 bits, we're done.
4879 if (IntPtrTy->getBitWidth() == 64)
4880 return RValue::get(Result);
4881
4882 // Otherwise, ask the codegen data what to do.
4883 if (getTargetHooks().extendPointerWithSExt())
4884 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4885 else
4886 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4887 }
4888 case Builtin::BI__builtin_setjmp: {
4889 // Buffer is a void**.
4890 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4891
4892 if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4893 // On this target, the back end fills in the context buffer completely.
4894 // It doesn't really matter if the frontend stores to the buffer before
4895 // calling setjmp, the back-end is going to overwrite them anyway.
4896 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4897 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4898 }
4899
4900 // Store the frame pointer to the setjmp buffer.
4901 Value *FrameAddr = Builder.CreateCall(
4902 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4903 ConstantInt::get(Int32Ty, 0));
4904 Builder.CreateStore(FrameAddr, Buf);
4905
4906 // Store the stack pointer to the setjmp buffer.
4907 Value *StackAddr = Builder.CreateStackSave();
4908 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4909
4910 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4911 Builder.CreateStore(StackAddr, StackSaveSlot);
4912
4913 // Call LLVM's EH setjmp, which is lightweight.
4914 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4915 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4916 }
4917 case Builtin::BI__builtin_longjmp: {
4918 Value *Buf = EmitScalarExpr(E->getArg(0));
4919
4920 // Call LLVM's EH longjmp, which is lightweight.
4921 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4922
4923 // longjmp doesn't return; mark this as unreachable.
4924 Builder.CreateUnreachable();
4925
4926 // We do need to preserve an insertion point.
4927 EmitBlock(createBasicBlock("longjmp.cont"));
4928
4929 return RValue::get(nullptr);
4930 }
4931 case Builtin::BI__builtin_launder: {
4932 const Expr *Arg = E->getArg(0);
4933 QualType ArgTy = Arg->getType()->getPointeeType();
4934 Value *Ptr = EmitScalarExpr(Arg);
4935 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4937
4938 return RValue::get(Ptr);
4939 }
4940 case Builtin::BI__sync_fetch_and_add:
4941 case Builtin::BI__sync_fetch_and_sub:
4942 case Builtin::BI__sync_fetch_and_or:
4943 case Builtin::BI__sync_fetch_and_and:
4944 case Builtin::BI__sync_fetch_and_xor:
4945 case Builtin::BI__sync_fetch_and_nand:
4946 case Builtin::BI__sync_add_and_fetch:
4947 case Builtin::BI__sync_sub_and_fetch:
4948 case Builtin::BI__sync_and_and_fetch:
4949 case Builtin::BI__sync_or_and_fetch:
4950 case Builtin::BI__sync_xor_and_fetch:
4951 case Builtin::BI__sync_nand_and_fetch:
4952 case Builtin::BI__sync_val_compare_and_swap:
4953 case Builtin::BI__sync_bool_compare_and_swap:
4954 case Builtin::BI__sync_lock_test_and_set:
4955 case Builtin::BI__sync_lock_release:
4956 case Builtin::BI__sync_swap:
4957 llvm_unreachable("Shouldn't make it through sema");
4958 case Builtin::BI__sync_fetch_and_add_1:
4959 case Builtin::BI__sync_fetch_and_add_2:
4960 case Builtin::BI__sync_fetch_and_add_4:
4961 case Builtin::BI__sync_fetch_and_add_8:
4962 case Builtin::BI__sync_fetch_and_add_16:
4963 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4964 case Builtin::BI__sync_fetch_and_sub_1:
4965 case Builtin::BI__sync_fetch_and_sub_2:
4966 case Builtin::BI__sync_fetch_and_sub_4:
4967 case Builtin::BI__sync_fetch_and_sub_8:
4968 case Builtin::BI__sync_fetch_and_sub_16:
4969 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4970 case Builtin::BI__sync_fetch_and_or_1:
4971 case Builtin::BI__sync_fetch_and_or_2:
4972 case Builtin::BI__sync_fetch_and_or_4:
4973 case Builtin::BI__sync_fetch_and_or_8:
4974 case Builtin::BI__sync_fetch_and_or_16:
4975 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4976 case Builtin::BI__sync_fetch_and_and_1:
4977 case Builtin::BI__sync_fetch_and_and_2:
4978 case Builtin::BI__sync_fetch_and_and_4:
4979 case Builtin::BI__sync_fetch_and_and_8:
4980 case Builtin::BI__sync_fetch_and_and_16:
4981 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4982 case Builtin::BI__sync_fetch_and_xor_1:
4983 case Builtin::BI__sync_fetch_and_xor_2:
4984 case Builtin::BI__sync_fetch_and_xor_4:
4985 case Builtin::BI__sync_fetch_and_xor_8:
4986 case Builtin::BI__sync_fetch_and_xor_16:
4987 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4988 case Builtin::BI__sync_fetch_and_nand_1:
4989 case Builtin::BI__sync_fetch_and_nand_2:
4990 case Builtin::BI__sync_fetch_and_nand_4:
4991 case Builtin::BI__sync_fetch_and_nand_8:
4992 case Builtin::BI__sync_fetch_and_nand_16:
4993 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4994
4995 // Clang extensions: not overloaded yet.
4996 case Builtin::BI__sync_fetch_and_min:
4997 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4998 case Builtin::BI__sync_fetch_and_max:
4999 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
5000 case Builtin::BI__sync_fetch_and_umin:
5001 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
5002 case Builtin::BI__sync_fetch_and_umax:
5003 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
5004
5005 case Builtin::BI__sync_add_and_fetch_1:
5006 case Builtin::BI__sync_add_and_fetch_2:
5007 case Builtin::BI__sync_add_and_fetch_4:
5008 case Builtin::BI__sync_add_and_fetch_8:
5009 case Builtin::BI__sync_add_and_fetch_16:
5010 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
5011 llvm::Instruction::Add);
5012 case Builtin::BI__sync_sub_and_fetch_1:
5013 case Builtin::BI__sync_sub_and_fetch_2:
5014 case Builtin::BI__sync_sub_and_fetch_4:
5015 case Builtin::BI__sync_sub_and_fetch_8:
5016 case Builtin::BI__sync_sub_and_fetch_16:
5017 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
5018 llvm::Instruction::Sub);
5019 case Builtin::BI__sync_and_and_fetch_1:
5020 case Builtin::BI__sync_and_and_fetch_2:
5021 case Builtin::BI__sync_and_and_fetch_4:
5022 case Builtin::BI__sync_and_and_fetch_8:
5023 case Builtin::BI__sync_and_and_fetch_16:
5024 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
5025 llvm::Instruction::And);
5026 case Builtin::BI__sync_or_and_fetch_1:
5027 case Builtin::BI__sync_or_and_fetch_2:
5028 case Builtin::BI__sync_or_and_fetch_4:
5029 case Builtin::BI__sync_or_and_fetch_8:
5030 case Builtin::BI__sync_or_and_fetch_16:
5031 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
5032 llvm::Instruction::Or);
5033 case Builtin::BI__sync_xor_and_fetch_1:
5034 case Builtin::BI__sync_xor_and_fetch_2:
5035 case Builtin::BI__sync_xor_and_fetch_4:
5036 case Builtin::BI__sync_xor_and_fetch_8:
5037 case Builtin::BI__sync_xor_and_fetch_16:
5038 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
5039 llvm::Instruction::Xor);
5040 case Builtin::BI__sync_nand_and_fetch_1:
5041 case Builtin::BI__sync_nand_and_fetch_2:
5042 case Builtin::BI__sync_nand_and_fetch_4:
5043 case Builtin::BI__sync_nand_and_fetch_8:
5044 case Builtin::BI__sync_nand_and_fetch_16:
5045 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
5046 llvm::Instruction::And, true);
5047
5048 case Builtin::BI__sync_val_compare_and_swap_1:
5049 case Builtin::BI__sync_val_compare_and_swap_2:
5050 case Builtin::BI__sync_val_compare_and_swap_4:
5051 case Builtin::BI__sync_val_compare_and_swap_8:
5052 case Builtin::BI__sync_val_compare_and_swap_16:
5053 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
5054
5055 case Builtin::BI__sync_bool_compare_and_swap_1:
5056 case Builtin::BI__sync_bool_compare_and_swap_2:
5057 case Builtin::BI__sync_bool_compare_and_swap_4:
5058 case Builtin::BI__sync_bool_compare_and_swap_8:
5059 case Builtin::BI__sync_bool_compare_and_swap_16:
5060 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
5061
5062 case Builtin::BI__sync_swap_1:
5063 case Builtin::BI__sync_swap_2:
5064 case Builtin::BI__sync_swap_4:
5065 case Builtin::BI__sync_swap_8:
5066 case Builtin::BI__sync_swap_16:
5067 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5068
5069 case Builtin::BI__sync_lock_test_and_set_1:
5070 case Builtin::BI__sync_lock_test_and_set_2:
5071 case Builtin::BI__sync_lock_test_and_set_4:
5072 case Builtin::BI__sync_lock_test_and_set_8:
5073 case Builtin::BI__sync_lock_test_and_set_16:
5074 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5075
5076 case Builtin::BI__sync_lock_release_1:
5077 case Builtin::BI__sync_lock_release_2:
5078 case Builtin::BI__sync_lock_release_4:
5079 case Builtin::BI__sync_lock_release_8:
5080 case Builtin::BI__sync_lock_release_16: {
5081 Address Ptr = CheckAtomicAlignment(*this, E);
5082 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5083
5084 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5085 getContext().getTypeSize(ElTy));
5086 llvm::StoreInst *Store =
5087 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
5088 Store->setAtomic(llvm::AtomicOrdering::Release);
5089 return RValue::get(nullptr);
5090 }
5091
5092 case Builtin::BI__sync_synchronize: {
5093 // We assume this is supposed to correspond to a C++0x-style
5094 // sequentially-consistent fence (i.e. this is only usable for
5095 // synchronization, not device I/O or anything like that). This intrinsic
5096 // is really badly designed in the sense that in theory, there isn't
5097 // any way to safely use it... but in practice, it mostly works
5098 // to use it with non-atomic loads and stores to get acquire/release
5099 // semantics.
5100 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5101 return RValue::get(nullptr);
5102 }
5103
5104 case Builtin::BI__builtin_nontemporal_load:
5105 return RValue::get(EmitNontemporalLoad(*this, E));
5106 case Builtin::BI__builtin_nontemporal_store:
5107 return RValue::get(EmitNontemporalStore(*this, E));
5108 case Builtin::BI__c11_atomic_is_lock_free:
5109 case Builtin::BI__atomic_is_lock_free: {
5110 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5111 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5112 // _Atomic(T) is always properly-aligned.
5113 const char *LibCallName = "__atomic_is_lock_free";
5114 CallArgList Args;
5115 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
5116 getContext().getSizeType());
5117 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5118 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
5120 else
5121 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
5123 const CGFunctionInfo &FuncInfo =
5125 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
5126 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
5127 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
5128 ReturnValueSlot(), Args);
5129 }
5130
5131 case Builtin::BI__atomic_test_and_set: {
5132 // Look at the argument type to determine whether this is a volatile
5133 // operation. The parameter type is always volatile.
5134 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5135 bool Volatile =
5137
5138 Address Ptr =
5140
5141 Value *NewVal = Builder.getInt8(1);
5142 Value *Order = EmitScalarExpr(E->getArg(1));
5143 if (isa<llvm::ConstantInt>(Order)) {
5144 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5145 AtomicRMWInst *Result = nullptr;
5146 switch (ord) {
5147 case 0: // memory_order_relaxed
5148 default: // invalid order
5149 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5150 llvm::AtomicOrdering::Monotonic);
5151 break;
5152 case 1: // memory_order_consume
5153 case 2: // memory_order_acquire
5154 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5155 llvm::AtomicOrdering::Acquire);
5156 break;
5157 case 3: // memory_order_release
5158 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5159 llvm::AtomicOrdering::Release);
5160 break;
5161 case 4: // memory_order_acq_rel
5162
5163 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5164 llvm::AtomicOrdering::AcquireRelease);
5165 break;
5166 case 5: // memory_order_seq_cst
5168 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5169 llvm::AtomicOrdering::SequentiallyConsistent);
5170 break;
5171 }
5172 Result->setVolatile(Volatile);
5173 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5174 }
5175
5176 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5177
5178 llvm::BasicBlock *BBs[5] = {
5179 createBasicBlock("monotonic", CurFn),
5180 createBasicBlock("acquire", CurFn),
5181 createBasicBlock("release", CurFn),
5182 createBasicBlock("acqrel", CurFn),
5183 createBasicBlock("seqcst", CurFn)
5184 };
5185 llvm::AtomicOrdering Orders[5] = {
5186 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
5187 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
5188 llvm::AtomicOrdering::SequentiallyConsistent};
5189
5190 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5191 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5192
5193 Builder.SetInsertPoint(ContBB);
5194 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
5195
5196 for (unsigned i = 0; i < 5; ++i) {
5197 Builder.SetInsertPoint(BBs[i]);
5198 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
5199 Ptr, NewVal, Orders[i]);
5200 RMW->setVolatile(Volatile);
5201 Result->addIncoming(RMW, BBs[i]);
5202 Builder.CreateBr(ContBB);
5203 }
5204
5205 SI->addCase(Builder.getInt32(0), BBs[0]);
5206 SI->addCase(Builder.getInt32(1), BBs[1]);
5207 SI->addCase(Builder.getInt32(2), BBs[1]);
5208 SI->addCase(Builder.getInt32(3), BBs[2]);
5209 SI->addCase(Builder.getInt32(4), BBs[3]);
5210 SI->addCase(Builder.getInt32(5), BBs[4]);
5211
5212 Builder.SetInsertPoint(ContBB);
5213 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5214 }
5215
5216 case Builtin::BI__atomic_clear: {
5217 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5218 bool Volatile =
5220
5221 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
5222 Ptr = Ptr.withElementType(Int8Ty);
5223 Value *NewVal = Builder.getInt8(0);
5224 Value *Order = EmitScalarExpr(E->getArg(1));
5225 if (isa<llvm::ConstantInt>(Order)) {
5226 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5227 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5228 switch (ord) {
5229 case 0: // memory_order_relaxed
5230 default: // invalid order
5231 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
5232 break;
5233 case 3: // memory_order_release
5234 Store->setOrdering(llvm::AtomicOrdering::Release);
5235 break;
5236 case 5: // memory_order_seq_cst
5237 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
5238 break;
5239 }
5240 return RValue::get(nullptr);
5241 }
5242
5243 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5244
5245 llvm::BasicBlock *BBs[3] = {
5246 createBasicBlock("monotonic", CurFn),
5247 createBasicBlock("release", CurFn),
5248 createBasicBlock("seqcst", CurFn)
5249 };
5250 llvm::AtomicOrdering Orders[3] = {
5251 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
5252 llvm::AtomicOrdering::SequentiallyConsistent};
5253
5254 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5255 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5256
5257 for (unsigned i = 0; i < 3; ++i) {
5258 Builder.SetInsertPoint(BBs[i]);
5259 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5260 Store->setOrdering(Orders[i]);
5261 Builder.CreateBr(ContBB);
5262 }
5263
5264 SI->addCase(Builder.getInt32(0), BBs[0]);
5265 SI->addCase(Builder.getInt32(3), BBs[1]);
5266 SI->addCase(Builder.getInt32(5), BBs[2]);
5267
5268 Builder.SetInsertPoint(ContBB);
5269 return RValue::get(nullptr);
5270 }
5271
5272 case Builtin::BI__atomic_thread_fence:
5273 case Builtin::BI__atomic_signal_fence:
5274 case Builtin::BI__c11_atomic_thread_fence:
5275 case Builtin::BI__c11_atomic_signal_fence: {
5276 llvm::SyncScope::ID SSID;
5277 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5278 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5279 SSID = llvm::SyncScope::SingleThread;
5280 else
5281 SSID = llvm::SyncScope::System;
5282 Value *Order = EmitScalarExpr(E->getArg(0));
5283 if (isa<llvm::ConstantInt>(Order)) {
5284 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5285 switch (ord) {
5286 case 0: // memory_order_relaxed
5287 default: // invalid order
5288 break;
5289 case 1: // memory_order_consume
5290 case 2: // memory_order_acquire
5291 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5292 break;
5293 case 3: // memory_order_release
5294 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5295 break;
5296 case 4: // memory_order_acq_rel
5297 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5298 break;
5299 case 5: // memory_order_seq_cst
5300 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5301 break;
5302 }
5303 return RValue::get(nullptr);
5304 }
5305
5306 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5307 AcquireBB = createBasicBlock("acquire", CurFn);
5308 ReleaseBB = createBasicBlock("release", CurFn);
5309 AcqRelBB = createBasicBlock("acqrel", CurFn);
5310 SeqCstBB = createBasicBlock("seqcst", CurFn);
5311 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5312
5313 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5314 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5315
5316 Builder.SetInsertPoint(AcquireBB);
5317 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5318 Builder.CreateBr(ContBB);
5319 SI->addCase(Builder.getInt32(1), AcquireBB);
5320 SI->addCase(Builder.getInt32(2), AcquireBB);
5321
5322 Builder.SetInsertPoint(ReleaseBB);
5323 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5324 Builder.CreateBr(ContBB);
5325 SI->addCase(Builder.getInt32(3), ReleaseBB);
5326
5327 Builder.SetInsertPoint(AcqRelBB);
5328 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5329 Builder.CreateBr(ContBB);
5330 SI->addCase(Builder.getInt32(4), AcqRelBB);
5331
5332 Builder.SetInsertPoint(SeqCstBB);
5333 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5334 Builder.CreateBr(ContBB);
5335 SI->addCase(Builder.getInt32(5), SeqCstBB);
5336
5337 Builder.SetInsertPoint(ContBB);
5338 return RValue::get(nullptr);
5339 }
5340 case Builtin::BI__scoped_atomic_thread_fence: {
5342
5343 Value *Order = EmitScalarExpr(E->getArg(0));
5344 Value *Scope = EmitScalarExpr(E->getArg(1));
5345 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5346 auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
5347 if (Ord && Scp) {
5348 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5349 ? ScopeModel->map(Scp->getZExtValue())
5350 : ScopeModel->map(ScopeModel->getFallBackValue());
5351 switch (Ord->getZExtValue()) {
5352 case 0: // memory_order_relaxed
5353 default: // invalid order
5354 break;
5355 case 1: // memory_order_consume
5356 case 2: // memory_order_acquire
5357 Builder.CreateFence(
5358 llvm::AtomicOrdering::Acquire,
5359 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5360 llvm::AtomicOrdering::Acquire,
5361 getLLVMContext()));
5362 break;
5363 case 3: // memory_order_release
5364 Builder.CreateFence(
5365 llvm::AtomicOrdering::Release,
5366 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5367 llvm::AtomicOrdering::Release,
5368 getLLVMContext()));
5369 break;
5370 case 4: // memory_order_acq_rel
5371 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5372 getTargetHooks().getLLVMSyncScopeID(
5373 getLangOpts(), SS,
5374 llvm::AtomicOrdering::AcquireRelease,
5375 getLLVMContext()));
5376 break;
5377 case 5: // memory_order_seq_cst
5378 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5379 getTargetHooks().getLLVMSyncScopeID(
5380 getLangOpts(), SS,
5381 llvm::AtomicOrdering::SequentiallyConsistent,
5382 getLLVMContext()));
5383 break;
5384 }
5385 return RValue::get(nullptr);
5386 }
5387
5388 llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5389
5391 OrderBBs;
5392 if (Ord) {
5393 switch (Ord->getZExtValue()) {
5394 case 0: // memory_order_relaxed
5395 default: // invalid order
5396 ContBB->eraseFromParent();
5397 return RValue::get(nullptr);
5398 case 1: // memory_order_consume
5399 case 2: // memory_order_acquire
5400 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5401 llvm::AtomicOrdering::Acquire);
5402 break;
5403 case 3: // memory_order_release
5404 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5405 llvm::AtomicOrdering::Release);
5406 break;
5407 case 4: // memory_order_acq_rel
5408 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5409 llvm::AtomicOrdering::AcquireRelease);
5410 break;
5411 case 5: // memory_order_seq_cst
5412 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5413 llvm::AtomicOrdering::SequentiallyConsistent);
5414 break;
5415 }
5416 } else {
5417 llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
5418 llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
5419 llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
5420 llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
5421
5422 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5423 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5424 SI->addCase(Builder.getInt32(1), AcquireBB);
5425 SI->addCase(Builder.getInt32(2), AcquireBB);
5426 SI->addCase(Builder.getInt32(3), ReleaseBB);
5427 SI->addCase(Builder.getInt32(4), AcqRelBB);
5428 SI->addCase(Builder.getInt32(5), SeqCstBB);
5429
5430 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5431 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5432 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5433 OrderBBs.emplace_back(SeqCstBB,
5434 llvm::AtomicOrdering::SequentiallyConsistent);
5435 }
5436
5437 for (auto &[OrderBB, Ordering] : OrderBBs) {
5438 Builder.SetInsertPoint(OrderBB);
5439 if (Scp) {
5440 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5441 ? ScopeModel->map(Scp->getZExtValue())
5442 : ScopeModel->map(ScopeModel->getFallBackValue());
5443 Builder.CreateFence(Ordering,
5444 getTargetHooks().getLLVMSyncScopeID(
5445 getLangOpts(), SS, Ordering, getLLVMContext()));
5446 Builder.CreateBr(ContBB);
5447 } else {
5448 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5449 for (unsigned Scp : ScopeModel->getRuntimeValues())
5450 BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5451
5452 auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5453 llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5454 for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5455 auto *B = BBs[Scp];
5456 SI->addCase(Builder.getInt32(Scp), B);
5457
5458 Builder.SetInsertPoint(B);
5459 Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5460 getLangOpts(), ScopeModel->map(Scp),
5461 Ordering, getLLVMContext()));
5462 Builder.CreateBr(ContBB);
5463 }
5464 }
5465 }
5466
5467 Builder.SetInsertPoint(ContBB);
5468 return RValue::get(nullptr);
5469 }
5470
5471 case Builtin::BI__builtin_signbit:
5472 case Builtin::BI__builtin_signbitf:
5473 case Builtin::BI__builtin_signbitl: {
5474 return RValue::get(
5475 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5476 ConvertType(E->getType())));
5477 }
5478 case Builtin::BI__warn_memset_zero_len:
5479 return RValue::getIgnored();
5480 case Builtin::BI__annotation: {
5481 // Re-encode each wide string to UTF8 and make an MDString.
5483 for (const Expr *Arg : E->arguments()) {
5484 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5485 assert(Str->getCharByteWidth() == 2);
5486 StringRef WideBytes = Str->getBytes();
5487 std::string StrUtf8;
5488 if (!convertUTF16ToUTF8String(
5489 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5490 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5491 continue;
5492 }
5493 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5494 }
5495
5496 // Build and MDTuple of MDStrings and emit the intrinsic call.
5497 llvm::Function *F =
5498 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5499 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5500 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5501 return RValue::getIgnored();
5502 }
5503 case Builtin::BI__builtin_annotation: {
5504 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5505 llvm::Function *F =
5506 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5507 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5508
5509 // Get the annotation string, go through casts. Sema requires this to be a
5510 // non-wide string literal, potentially casted, so the cast<> is safe.
5511 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5512 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5513 return RValue::get(
5514 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5515 }
5516 case Builtin::BI__builtin_addcb:
5517 case Builtin::BI__builtin_addcs:
5518 case Builtin::BI__builtin_addc:
5519 case Builtin::BI__builtin_addcl:
5520 case Builtin::BI__builtin_addcll:
5521 case Builtin::BI__builtin_subcb:
5522 case Builtin::BI__builtin_subcs:
5523 case Builtin::BI__builtin_subc:
5524 case Builtin::BI__builtin_subcl:
5525 case Builtin::BI__builtin_subcll: {
5526
5527 // We translate all of these builtins from expressions of the form:
5528 // int x = ..., y = ..., carryin = ..., carryout, result;
5529 // result = __builtin_addc(x, y, carryin, &carryout);
5530 //
5531 // to LLVM IR of the form:
5532 //
5533 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5534 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5535 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5536 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5537 // i32 %carryin)
5538 // %result = extractvalue {i32, i1} %tmp2, 0
5539 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5540 // %tmp3 = or i1 %carry1, %carry2
5541 // %tmp4 = zext i1 %tmp3 to i32
5542 // store i32 %tmp4, i32* %carryout
5543
5544 // Scalarize our inputs.
5545 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5546 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5547 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5548 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5549
5550 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5551 llvm::Intrinsic::ID IntrinsicId;
5552 switch (BuiltinID) {
5553 default: llvm_unreachable("Unknown multiprecision builtin id.");
5554 case Builtin::BI__builtin_addcb:
5555 case Builtin::BI__builtin_addcs:
5556 case Builtin::BI__builtin_addc:
5557 case Builtin::BI__builtin_addcl:
5558 case Builtin::BI__builtin_addcll:
5559 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5560 break;
5561 case Builtin::BI__builtin_subcb:
5562 case Builtin::BI__builtin_subcs:
5563 case Builtin::BI__builtin_subc:
5564 case Builtin::BI__builtin_subcl:
5565 case Builtin::BI__builtin_subcll:
5566 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5567 break;
5568 }
5569
5570 // Construct our resulting LLVM IR expression.
5571 llvm::Value *Carry1;
5572 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5573 X, Y, Carry1);
5574 llvm::Value *Carry2;
5575 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5576 Sum1, Carryin, Carry2);
5577 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5578 X->getType());
5579 Builder.CreateStore(CarryOut, CarryOutPtr);
5580 return RValue::get(Sum2);
5581 }
5582
5583 case Builtin::BI__builtin_add_overflow:
5584 case Builtin::BI__builtin_sub_overflow:
5585 case Builtin::BI__builtin_mul_overflow: {
5586 const clang::Expr *LeftArg = E->getArg(0);
5587 const clang::Expr *RightArg = E->getArg(1);
5588 const clang::Expr *ResultArg = E->getArg(2);
5589
5590 clang::QualType ResultQTy =
5591 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5592
5593 WidthAndSignedness LeftInfo =
5595 WidthAndSignedness RightInfo =
5597 WidthAndSignedness ResultInfo =
5599
5600 // Handle mixed-sign multiplication as a special case, because adding
5601 // runtime or backend support for our generic irgen would be too expensive.
5602 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5603 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5604 RightInfo, ResultArg, ResultQTy,
5605 ResultInfo);
5606
5607 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5608 ResultInfo))
5610 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5611 ResultInfo);
5612
5613 WidthAndSignedness EncompassingInfo =
5614 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5615
5616 llvm::Type *EncompassingLLVMTy =
5617 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5618
5619 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5620
5621 llvm::Intrinsic::ID IntrinsicId;
5622 switch (BuiltinID) {
5623 default:
5624 llvm_unreachable("Unknown overflow builtin id.");
5625 case Builtin::BI__builtin_add_overflow:
5626 IntrinsicId = EncompassingInfo.Signed
5627 ? llvm::Intrinsic::sadd_with_overflow
5628 : llvm::Intrinsic::uadd_with_overflow;
5629 break;
5630 case Builtin::BI__builtin_sub_overflow:
5631 IntrinsicId = EncompassingInfo.Signed
5632 ? llvm::Intrinsic::ssub_with_overflow
5633 : llvm::Intrinsic::usub_with_overflow;
5634 break;
5635 case Builtin::BI__builtin_mul_overflow:
5636 IntrinsicId = EncompassingInfo.Signed
5637 ? llvm::Intrinsic::smul_with_overflow
5638 : llvm::Intrinsic::umul_with_overflow;
5639 break;
5640 }
5641
5642 llvm::Value *Left = EmitScalarExpr(LeftArg);
5643 llvm::Value *Right = EmitScalarExpr(RightArg);
5644 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5645
5646 // Extend each operand to the encompassing type.
5647 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5648 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5649
5650 // Perform the operation on the extended values.
5651 llvm::Value *Overflow, *Result;
5652 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5653
5654 if (EncompassingInfo.Width > ResultInfo.Width) {
5655 // The encompassing type is wider than the result type, so we need to
5656 // truncate it.
5657 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5658
5659 // To see if the truncation caused an overflow, we will extend
5660 // the result and then compare it to the original result.
5661 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5662 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5663 llvm::Value *TruncationOverflow =
5664 Builder.CreateICmpNE(Result, ResultTruncExt);
5665
5666 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5667 Result = ResultTrunc;
5668 }
5669
5670 // Finally, store the result using the pointer.
5671 bool isVolatile =
5672 ResultArg->getType()->getPointeeType().isVolatileQualified();
5673 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5674
5675 return RValue::get(Overflow);
5676 }
5677
5678 case Builtin::BI__builtin_uadd_overflow:
5679 case Builtin::BI__builtin_uaddl_overflow:
5680 case Builtin::BI__builtin_uaddll_overflow:
5681 case Builtin::BI__builtin_usub_overflow:
5682 case Builtin::BI__builtin_usubl_overflow:
5683 case Builtin::BI__builtin_usubll_overflow:
5684 case Builtin::BI__builtin_umul_overflow:
5685 case Builtin::BI__builtin_umull_overflow:
5686 case Builtin::BI__builtin_umulll_overflow:
5687 case Builtin::BI__builtin_sadd_overflow:
5688 case Builtin::BI__builtin_saddl_overflow:
5689 case Builtin::BI__builtin_saddll_overflow:
5690 case Builtin::BI__builtin_ssub_overflow:
5691 case Builtin::BI__builtin_ssubl_overflow:
5692 case Builtin::BI__builtin_ssubll_overflow:
5693 case Builtin::BI__builtin_smul_overflow:
5694 case Builtin::BI__builtin_smull_overflow:
5695 case Builtin::BI__builtin_smulll_overflow: {
5696
5697 // We translate all of these builtins directly to the relevant llvm IR node.
5698
5699 // Scalarize our inputs.
5700 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5701 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5702 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5703
5704 // Decide which of the overflow intrinsics we are lowering to:
5705 llvm::Intrinsic::ID IntrinsicId;
5706 switch (BuiltinID) {
5707 default: llvm_unreachable("Unknown overflow builtin id.");
5708 case Builtin::BI__builtin_uadd_overflow:
5709 case Builtin::BI__builtin_uaddl_overflow:
5710 case Builtin::BI__builtin_uaddll_overflow:
5711 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5712 break;
5713 case Builtin::BI__builtin_usub_overflow:
5714 case Builtin::BI__builtin_usubl_overflow:
5715 case Builtin::BI__builtin_usubll_overflow:
5716 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5717 break;
5718 case Builtin::BI__builtin_umul_overflow:
5719 case Builtin::BI__builtin_umull_overflow:
5720 case Builtin::BI__builtin_umulll_overflow:
5721 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5722 break;
5723 case Builtin::BI__builtin_sadd_overflow:
5724 case Builtin::BI__builtin_saddl_overflow:
5725 case Builtin::BI__builtin_saddll_overflow:
5726 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5727 break;
5728 case Builtin::BI__builtin_ssub_overflow:
5729 case Builtin::BI__builtin_ssubl_overflow:
5730 case Builtin::BI__builtin_ssubll_overflow:
5731 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5732 break;
5733 case Builtin::BI__builtin_smul_overflow:
5734 case Builtin::BI__builtin_smull_overflow:
5735 case Builtin::BI__builtin_smulll_overflow:
5736 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5737 break;
5738 }
5739
5740
5741 llvm::Value *Carry;
5742 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5743 Builder.CreateStore(Sum, SumOutPtr);
5744
5745 return RValue::get(Carry);
5746 }
5747 case Builtin::BIaddressof:
5748 case Builtin::BI__addressof:
5749 case Builtin::BI__builtin_addressof:
5750 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5751 case Builtin::BI__builtin_function_start:
5754 case Builtin::BI__builtin_operator_new:
5756 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5757 case Builtin::BI__builtin_operator_delete:
5759 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5760 return RValue::get(nullptr);
5761
5762 case Builtin::BI__builtin_is_aligned:
5763 return EmitBuiltinIsAligned(E);
5764 case Builtin::BI__builtin_align_up:
5765 return EmitBuiltinAlignTo(E, true);
5766 case Builtin::BI__builtin_align_down:
5767 return EmitBuiltinAlignTo(E, false);
5768
5769 case Builtin::BI__noop:
5770 // __noop always evaluates to an integer literal zero.
5771 return RValue::get(ConstantInt::get(IntTy, 0));
5772 case Builtin::BI__builtin_call_with_static_chain: {
5773 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5774 const Expr *Chain = E->getArg(1);
5775 return EmitCall(Call->getCallee()->getType(),
5776 EmitCallee(Call->getCallee()), Call, ReturnValue,
5777 EmitScalarExpr(Chain));
5778 }
5779 case Builtin::BI_InterlockedExchange8:
5780 case Builtin::BI_InterlockedExchange16:
5781 case Builtin::BI_InterlockedExchange:
5782 case Builtin::BI_InterlockedExchangePointer:
5783 return RValue::get(
5784 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5785 case Builtin::BI_InterlockedCompareExchangePointer:
5786 return RValue::get(
5787 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
5788 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5789 return RValue::get(
5790 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
5791 case Builtin::BI_InterlockedCompareExchange8:
5792 case Builtin::BI_InterlockedCompareExchange16:
5793 case Builtin::BI_InterlockedCompareExchange:
5794 case Builtin::BI_InterlockedCompareExchange64:
5796 case Builtin::BI_InterlockedIncrement16:
5797 case Builtin::BI_InterlockedIncrement:
5798 return RValue::get(
5799 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5800 case Builtin::BI_InterlockedDecrement16:
5801 case Builtin::BI_InterlockedDecrement:
5802 return RValue::get(
5803 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5804 case Builtin::BI_InterlockedAnd8:
5805 case Builtin::BI_InterlockedAnd16:
5806 case Builtin::BI_InterlockedAnd:
5807 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5808 case Builtin::BI_InterlockedExchangeAdd8:
5809 case Builtin::BI_InterlockedExchangeAdd16:
5810 case Builtin::BI_InterlockedExchangeAdd:
5811 return RValue::get(
5812 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5813 case Builtin::BI_InterlockedExchangeSub8:
5814 case Builtin::BI_InterlockedExchangeSub16:
5815 case Builtin::BI_InterlockedExchangeSub:
5816 return RValue::get(
5817 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5818 case Builtin::BI_InterlockedOr8:
5819 case Builtin::BI_InterlockedOr16:
5820 case Builtin::BI_InterlockedOr:
5821 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5822 case Builtin::BI_InterlockedXor8:
5823 case Builtin::BI_InterlockedXor16:
5824 case Builtin::BI_InterlockedXor:
5825 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5826
5827 case Builtin::BI_bittest64:
5828 case Builtin::BI_bittest:
5829 case Builtin::BI_bittestandcomplement64:
5830 case Builtin::BI_bittestandcomplement:
5831 case Builtin::BI_bittestandreset64:
5832 case Builtin::BI_bittestandreset:
5833 case Builtin::BI_bittestandset64:
5834 case Builtin::BI_bittestandset:
5835 case Builtin::BI_interlockedbittestandreset:
5836 case Builtin::BI_interlockedbittestandreset64:
5837 case Builtin::BI_interlockedbittestandset64:
5838 case Builtin::BI_interlockedbittestandset:
5839 case Builtin::BI_interlockedbittestandset_acq:
5840 case Builtin::BI_interlockedbittestandset_rel:
5841 case Builtin::BI_interlockedbittestandset_nf:
5842 case Builtin::BI_interlockedbittestandreset_acq:
5843 case Builtin::BI_interlockedbittestandreset_rel:
5844 case Builtin::BI_interlockedbittestandreset_nf:
5845 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5846
5847 // These builtins exist to emit regular volatile loads and stores not
5848 // affected by the -fms-volatile setting.
5849 case Builtin::BI__iso_volatile_load8:
5850 case Builtin::BI__iso_volatile_load16:
5851 case Builtin::BI__iso_volatile_load32:
5852 case Builtin::BI__iso_volatile_load64:
5853 return RValue::get(EmitISOVolatileLoad(*this, E));
5854 case Builtin::BI__iso_volatile_store8:
5855 case Builtin::BI__iso_volatile_store16:
5856 case Builtin::BI__iso_volatile_store32:
5857 case Builtin::BI__iso_volatile_store64:
5858 return RValue::get(EmitISOVolatileStore(*this, E));
5859
5860 case Builtin::BI__builtin_ptrauth_sign_constant:
5861 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5862
5863 case Builtin::BI__builtin_ptrauth_auth:
5864 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5865 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5866 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5867 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5868 case Builtin::BI__builtin_ptrauth_strip: {
5869 // Emit the arguments.
5871 for (auto argExpr : E->arguments())
5872 Args.push_back(EmitScalarExpr(argExpr));
5873
5874 // Cast the value to intptr_t, saving its original type.
5875 llvm::Type *OrigValueType = Args[0]->getType();
5876 if (OrigValueType->isPointerTy())
5877 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5878
5879 switch (BuiltinID) {
5880 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5881 if (Args[4]->getType()->isPointerTy())
5882 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5883 [[fallthrough]];
5884
5885 case Builtin::BI__builtin_ptrauth_auth:
5886 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5887 if (Args[2]->getType()->isPointerTy())
5888 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5889 break;
5890
5891 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5892 if (Args[1]->getType()->isPointerTy())
5893 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5894 break;
5895
5896 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5897 case Builtin::BI__builtin_ptrauth_strip:
5898 break;
5899 }
5900
5901 // Call the intrinsic.
5902 auto IntrinsicID = [&]() -> unsigned {
5903 switch (BuiltinID) {
5904 case Builtin::BI__builtin_ptrauth_auth:
5905 return llvm::Intrinsic::ptrauth_auth;
5906 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5907 return llvm::Intrinsic::ptrauth_resign;
5908 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5909 return llvm::Intrinsic::ptrauth_blend;
5910 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5911 return llvm::Intrinsic::ptrauth_sign_generic;
5912 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5913 return llvm::Intrinsic::ptrauth_sign;
5914 case Builtin::BI__builtin_ptrauth_strip:
5915 return llvm::Intrinsic::ptrauth_strip;
5916 }
5917 llvm_unreachable("bad ptrauth intrinsic");
5918 }();
5919 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5920 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5921
5922 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5923 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5924 OrigValueType->isPointerTy()) {
5925 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5926 }
5927 return RValue::get(Result);
5928 }
5929
5930 case Builtin::BI__exception_code:
5931 case Builtin::BI_exception_code:
5933 case Builtin::BI__exception_info:
5934 case Builtin::BI_exception_info:
5936 case Builtin::BI__abnormal_termination:
5937 case Builtin::BI_abnormal_termination:
5939 case Builtin::BI_setjmpex:
5940 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5941 E->getArg(0)->getType()->isPointerType())
5942 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5943 break;
5944 case Builtin::BI_setjmp:
5945 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5946 E->getArg(0)->getType()->isPointerType()) {
5947 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5948 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5949 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5950 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5951 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5952 }
5953 break;
5954
5955 // C++ std:: builtins.
5956 case Builtin::BImove:
5957 case Builtin::BImove_if_noexcept:
5958 case Builtin::BIforward:
5959 case Builtin::BIforward_like:
5960 case Builtin::BIas_const:
5961 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5962 case Builtin::BI__GetExceptionInfo: {
5963 if (llvm::GlobalVariable *GV =
5965 return RValue::get(GV);
5966 break;
5967 }
5968
5969 case Builtin::BI__fastfail:
5970 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5971
5972 case Builtin::BI__builtin_coro_id:
5973 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5974 case Builtin::BI__builtin_coro_promise:
5975 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5976 case Builtin::BI__builtin_coro_resume:
5977 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5978 return RValue::get(nullptr);
5979 case Builtin::BI__builtin_coro_frame:
5980 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5981 case Builtin::BI__builtin_coro_noop:
5982 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5983 case Builtin::BI__builtin_coro_free:
5984 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5985 case Builtin::BI__builtin_coro_destroy:
5986 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5987 return RValue::get(nullptr);
5988 case Builtin::BI__builtin_coro_done:
5989 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5990 case Builtin::BI__builtin_coro_alloc:
5991 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5992 case Builtin::BI__builtin_coro_begin:
5993 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5994 case Builtin::BI__builtin_coro_end:
5995 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5996 case Builtin::BI__builtin_coro_suspend:
5997 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5998 case Builtin::BI__builtin_coro_size:
5999 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
6000 case Builtin::BI__builtin_coro_align:
6001 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
6002
6003 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
6004 case Builtin::BIread_pipe:
6005 case Builtin::BIwrite_pipe: {
6006 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6007 *Arg1 = EmitScalarExpr(E->getArg(1));
6008 CGOpenCLRuntime OpenCLRT(CGM);
6009 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6010 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6011
6012 // Type of the generic packet parameter.
6013 unsigned GenericAS =
6015 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
6016
6017 // Testing which overloaded version we should generate the call for.
6018 if (2U == E->getNumArgs()) {
6019 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
6020 : "__write_pipe_2";
6021 // Creating a generic function type to be able to call with any builtin or
6022 // user defined type.
6023 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
6024 llvm::FunctionType *FTy = llvm::FunctionType::get(
6025 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6026 Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy);
6027 return RValue::get(
6029 {Arg0, ACast, PacketSize, PacketAlign}));
6030 } else {
6031 assert(4 == E->getNumArgs() &&
6032 "Illegal number of parameters to pipe function");
6033 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
6034 : "__write_pipe_4";
6035
6036 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
6037 Int32Ty, Int32Ty};
6038 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
6039 *Arg3 = EmitScalarExpr(E->getArg(3));
6040 llvm::FunctionType *FTy = llvm::FunctionType::get(
6041 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6042 Value *ACast = Builder.CreateAddrSpaceCast(Arg3, I8PTy);
6043 // We know the third argument is an integer type, but we may need to cast
6044 // it to i32.
6045 if (Arg2->getType() != Int32Ty)
6046 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
6047 return RValue::get(
6049 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
6050 }
6051 }
6052 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
6053 // functions
6054 case Builtin::BIreserve_read_pipe:
6055 case Builtin::BIreserve_write_pipe:
6056 case Builtin::BIwork_group_reserve_read_pipe:
6057 case Builtin::BIwork_group_reserve_write_pipe:
6058 case Builtin::BIsub_group_reserve_read_pipe:
6059 case Builtin::BIsub_group_reserve_write_pipe: {
6060 // Composing the mangled name for the function.
6061 const char *Name;
6062 if (BuiltinID == Builtin::BIreserve_read_pipe)
6063 Name = "__reserve_read_pipe";
6064 else if (BuiltinID == Builtin::BIreserve_write_pipe)
6065 Name = "__reserve_write_pipe";
6066 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
6067 Name = "__work_group_reserve_read_pipe";
6068 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
6069 Name = "__work_group_reserve_write_pipe";
6070 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
6071 Name = "__sub_group_reserve_read_pipe";
6072 else
6073 Name = "__sub_group_reserve_write_pipe";
6074
6075 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6076 *Arg1 = EmitScalarExpr(E->getArg(1));
6077 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
6078 CGOpenCLRuntime OpenCLRT(CGM);
6079 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6080 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6081
6082 // Building the generic function prototype.
6083 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
6084 llvm::FunctionType *FTy = llvm::FunctionType::get(
6085 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6086 // We know the second argument is an integer type, but we may need to cast
6087 // it to i32.
6088 if (Arg1->getType() != Int32Ty)
6089 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
6091 {Arg0, Arg1, PacketSize, PacketAlign}));
6092 }
6093 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
6094 // functions
6095 case Builtin::BIcommit_read_pipe:
6096 case Builtin::BIcommit_write_pipe:
6097 case Builtin::BIwork_group_commit_read_pipe:
6098 case Builtin::BIwork_group_commit_write_pipe:
6099 case Builtin::BIsub_group_commit_read_pipe:
6100 case Builtin::BIsub_group_commit_write_pipe: {
6101 const char *Name;
6102 if (BuiltinID == Builtin::BIcommit_read_pipe)
6103 Name = "__commit_read_pipe";
6104 else if (BuiltinID == Builtin::BIcommit_write_pipe)
6105 Name = "__commit_write_pipe";
6106 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
6107 Name = "__work_group_commit_read_pipe";
6108 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
6109 Name = "__work_group_commit_write_pipe";
6110 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
6111 Name = "__sub_group_commit_read_pipe";
6112 else
6113 Name = "__sub_group_commit_write_pipe";
6114
6115 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6116 *Arg1 = EmitScalarExpr(E->getArg(1));
6117 CGOpenCLRuntime OpenCLRT(CGM);
6118 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6119 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6120
6121 // Building the generic function prototype.
6122 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
6123 llvm::FunctionType *FTy =
6124 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
6125 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6126
6128 {Arg0, Arg1, PacketSize, PacketAlign}));
6129 }
6130 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
6131 case Builtin::BIget_pipe_num_packets:
6132 case Builtin::BIget_pipe_max_packets: {
6133 const char *BaseName;
6134 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
6135 if (BuiltinID == Builtin::BIget_pipe_num_packets)
6136 BaseName = "__get_pipe_num_packets";
6137 else
6138 BaseName = "__get_pipe_max_packets";
6139 std::string Name = std::string(BaseName) +
6140 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
6141
6142 // Building the generic function prototype.
6143 Value *Arg0 = EmitScalarExpr(E->getArg(0));
6144 CGOpenCLRuntime OpenCLRT(CGM);
6145 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6146 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6147 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
6148 llvm::FunctionType *FTy = llvm::FunctionType::get(
6149 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6150
6152 {Arg0, PacketSize, PacketAlign}));
6153 }
6154
6155 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6156 case Builtin::BIto_global:
6157 case Builtin::BIto_local:
6158 case Builtin::BIto_private: {
6159 auto Arg0 = EmitScalarExpr(E->getArg(0));
6160 auto NewArgT = llvm::PointerType::get(
6163 auto NewRetT = llvm::PointerType::get(
6167 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
6168 llvm::Value *NewArg;
6169 if (Arg0->getType()->getPointerAddressSpace() !=
6170 NewArgT->getPointerAddressSpace())
6171 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
6172 else
6173 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6174 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
6175 auto NewCall =
6176 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
6177 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
6178 ConvertType(E->getType())));
6179 }
6180
6181 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6182 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6183 // The code below expands the builtin call to a call to one of the following
6184 // functions that an OpenCL runtime library will have to provide:
6185 // __enqueue_kernel_basic
6186 // __enqueue_kernel_varargs
6187 // __enqueue_kernel_basic_events
6188 // __enqueue_kernel_events_varargs
6189 case Builtin::BIenqueue_kernel: {
6190 StringRef Name; // Generated function call name
6191 unsigned NumArgs = E->getNumArgs();
6192
6193 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
6194 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6195 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6196
6197 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
6198 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
6199 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
6200 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
6201 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
6202
6203 if (NumArgs == 4) {
6204 // The most basic form of the call with parameters:
6205 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6206 Name = "__enqueue_kernel_basic";
6207 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
6208 GenericVoidPtrTy};
6209 llvm::FunctionType *FTy = llvm::FunctionType::get(
6210 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6211
6212 auto Info =
6213 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6214 llvm::Value *Kernel =
6215 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6216 llvm::Value *Block =
6217 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6218
6219 auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6220 {Queue, Flags, Range, Kernel, Block});
6221 return RValue::get(RTCall);
6222 }
6223 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
6224
6225 // Create a temporary array to hold the sizes of local pointer arguments
6226 // for the block. \p First is the position of the first size argument.
6227 auto CreateArrayForSizeVar = [=](unsigned First)
6228 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6229 llvm::APInt ArraySize(32, NumArgs - First);
6231 getContext().getSizeType(), ArraySize, nullptr,
6233 /*IndexTypeQuals=*/0);
6234 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
6235 llvm::Value *TmpPtr = Tmp.getPointer();
6236 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6237 // however for cases where the default AS is not the Alloca AS, Tmp is
6238 // actually the Alloca ascasted to the default AS, hence the
6239 // stripPointerCasts()
6240 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6241 llvm::Value *TmpSize = EmitLifetimeStart(
6242 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6243 llvm::Value *ElemPtr;
6244 // Each of the following arguments specifies the size of the corresponding
6245 // argument passed to the enqueued block.
6246 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
6247 for (unsigned I = First; I < NumArgs; ++I) {
6248 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
6249 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
6250 {Zero, Index});
6251 if (I == First)
6252 ElemPtr = GEP;
6253 auto *V =
6254 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
6256 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
6257 }
6258 // Return the Alloca itself rather than a potential ascast as this is only
6259 // used by the paired EmitLifetimeEnd.
6260 return std::tie(ElemPtr, TmpSize, Alloca);
6261 };
6262
6263 // Could have events and/or varargs.
6264 if (E->getArg(3)->getType()->isBlockPointerType()) {
6265 // No events passed, but has variadic arguments.
6266 Name = "__enqueue_kernel_varargs";
6267 auto Info =
6268 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6269 llvm::Value *Kernel =
6270 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6271 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6272 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6273 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6274
6275 // Create a vector of the arguments, as well as a constant value to
6276 // express to the runtime the number of variadic arguments.
6277 llvm::Value *const Args[] = {Queue, Flags,
6278 Range, Kernel,
6279 Block, ConstantInt::get(IntTy, NumArgs - 4),
6280 ElemPtr};
6281 llvm::Type *const ArgTys[] = {
6282 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
6283 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
6284
6285 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
6286 auto Call = RValue::get(
6287 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
6288 if (TmpSize)
6289 EmitLifetimeEnd(TmpSize, TmpPtr);
6290 return Call;
6291 }
6292 // Any calls now have event arguments passed.
6293 if (NumArgs >= 7) {
6294 llvm::PointerType *PtrTy = llvm::PointerType::get(
6297
6298 llvm::Value *NumEvents =
6299 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
6300
6301 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6302 // to be a null pointer constant (including `0` literal), we can take it
6303 // into account and emit null pointer directly.
6304 llvm::Value *EventWaitList = nullptr;
6305 if (E->getArg(4)->isNullPointerConstant(
6307 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6308 } else {
6309 EventWaitList =
6310 E->getArg(4)->getType()->isArrayType()
6311 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
6312 : EmitScalarExpr(E->getArg(4));
6313 // Convert to generic address space.
6314 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
6315 }
6316 llvm::Value *EventRet = nullptr;
6317 if (E->getArg(5)->isNullPointerConstant(
6319 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6320 } else {
6321 EventRet =
6322 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
6323 }
6324
6325 auto Info =
6326 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
6327 llvm::Value *Kernel =
6328 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6329 llvm::Value *Block =
6330 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6331
6332 std::vector<llvm::Type *> ArgTys = {
6333 QueueTy, Int32Ty, RangeTy, Int32Ty,
6334 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6335
6336 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
6337 NumEvents, EventWaitList, EventRet,
6338 Kernel, Block};
6339
6340 if (NumArgs == 7) {
6341 // Has events but no variadics.
6342 Name = "__enqueue_kernel_basic_events";
6343 llvm::FunctionType *FTy = llvm::FunctionType::get(
6344 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6345 return RValue::get(
6348 }
6349 // Has event info and variadics
6350 // Pass the number of variadics to the runtime function too.
6351 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
6352 ArgTys.push_back(Int32Ty);
6353 Name = "__enqueue_kernel_events_varargs";
6354
6355 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6356 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6357 Args.push_back(ElemPtr);
6358 ArgTys.push_back(ElemPtr->getType());
6359
6360 llvm::FunctionType *FTy = llvm::FunctionType::get(
6361 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6362 auto Call =
6365 if (TmpSize)
6366 EmitLifetimeEnd(TmpSize, TmpPtr);
6367 return Call;
6368 }
6369 llvm_unreachable("Unexpected enqueue_kernel signature");
6370 }
6371 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6372 // parameter.
6373 case Builtin::BIget_kernel_work_group_size: {
6374 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6375 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6376 auto Info =
6377 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6378 Value *Kernel =
6379 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6380 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6383 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6384 false),
6385 "__get_kernel_work_group_size_impl"),
6386 {Kernel, Arg}));
6387 }
6388 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6389 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6390 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6391 auto Info =
6392 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6393 Value *Kernel =
6394 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6395 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6398 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6399 false),
6400 "__get_kernel_preferred_work_group_size_multiple_impl"),
6401 {Kernel, Arg}));
6402 }
6403 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6404 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6405 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6406 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6407 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
6408 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
6409 auto Info =
6410 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
6411 Value *Kernel =
6412 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6413 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6414 const char *Name =
6415 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6416 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6417 : "__get_kernel_sub_group_count_for_ndrange_impl";
6420 llvm::FunctionType::get(
6421 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6422 false),
6423 Name),
6424 {NDRange, Kernel, Block}));
6425 }
6426 case Builtin::BI__builtin_store_half:
6427 case Builtin::BI__builtin_store_halff: {
6428 Value *Val = EmitScalarExpr(E->getArg(0));
6430 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
6431 Builder.CreateStore(HalfVal, Address);
6432 return RValue::get(nullptr);
6433 }
6434 case Builtin::BI__builtin_load_half: {
6436 Value *HalfVal = Builder.CreateLoad(Address);
6437 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
6438 }
6439 case Builtin::BI__builtin_load_halff: {
6441 Value *HalfVal = Builder.CreateLoad(Address);
6442 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6443 }
6444 case Builtin::BI__builtin_printf:
6445 case Builtin::BIprintf:
6446 if (getTarget().getTriple().isNVPTX() ||
6447 getTarget().getTriple().isAMDGCN() ||
6448 (getTarget().getTriple().isSPIRV() &&
6449 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6450 if (getTarget().getTriple().isNVPTX())
6452 if ((getTarget().getTriple().isAMDGCN() ||
6453 getTarget().getTriple().isSPIRV()) &&
6454 getLangOpts().HIP)
6456 }
6457
6458 break;
6459 case Builtin::BI__builtin_canonicalize:
6460 case Builtin::BI__builtin_canonicalizef:
6461 case Builtin::BI__builtin_canonicalizef16:
6462 case Builtin::BI__builtin_canonicalizel:
6463 return RValue::get(
6464 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6465
6466 case Builtin::BI__builtin_thread_pointer: {
6467 if (!getContext().getTargetInfo().isTLSSupported())
6468 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6469 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6470 break;
6471 }
6472 case Builtin::BI__builtin_os_log_format:
6473 return emitBuiltinOSLogFormat(*E);
6474
6475 case Builtin::BI__xray_customevent: {
6477 return RValue::getIgnored();
6478
6481 return RValue::getIgnored();
6482
6483 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6484 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6485 return RValue::getIgnored();
6486
6487 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6488 auto FTy = F->getFunctionType();
6489 auto Arg0 = E->getArg(0);
6490 auto Arg0Val = EmitScalarExpr(Arg0);
6491 auto Arg0Ty = Arg0->getType();
6492 auto PTy0 = FTy->getParamType(0);
6493 if (PTy0 != Arg0Val->getType()) {
6494 if (Arg0Ty->isArrayType())
6495 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6496 else
6497 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6498 }
6499 auto Arg1 = EmitScalarExpr(E->getArg(1));
6500 auto PTy1 = FTy->getParamType(1);
6501 if (PTy1 != Arg1->getType())
6502 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6503 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6504 }
6505
6506 case Builtin::BI__xray_typedevent: {
6507 // TODO: There should be a way to always emit events even if the current
6508 // function is not instrumented. Losing events in a stream can cripple
6509 // a trace.
6511 return RValue::getIgnored();
6512
6515 return RValue::getIgnored();
6516
6517 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6518 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6519 return RValue::getIgnored();
6520
6521 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6522 auto FTy = F->getFunctionType();
6523 auto Arg0 = EmitScalarExpr(E->getArg(0));
6524 auto PTy0 = FTy->getParamType(0);
6525 if (PTy0 != Arg0->getType())
6526 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6527 auto Arg1 = E->getArg(1);
6528 auto Arg1Val = EmitScalarExpr(Arg1);
6529 auto Arg1Ty = Arg1->getType();
6530 auto PTy1 = FTy->getParamType(1);
6531 if (PTy1 != Arg1Val->getType()) {
6532 if (Arg1Ty->isArrayType())
6533 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6534 else
6535 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6536 }
6537 auto Arg2 = EmitScalarExpr(E->getArg(2));
6538 auto PTy2 = FTy->getParamType(2);
6539 if (PTy2 != Arg2->getType())
6540 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6541 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6542 }
6543
6544 case Builtin::BI__builtin_ms_va_start:
6545 case Builtin::BI__builtin_ms_va_end:
6546 return RValue::get(
6548 BuiltinID == Builtin::BI__builtin_ms_va_start));
6549
6550 case Builtin::BI__builtin_ms_va_copy: {
6551 // Lower this manually. We can't reliably determine whether or not any
6552 // given va_copy() is for a Win64 va_list from the calling convention
6553 // alone, because it's legal to do this from a System V ABI function.
6554 // With opaque pointer types, we won't have enough information in LLVM
6555 // IR to determine this from the argument types, either. Best to do it
6556 // now, while we have enough information.
6557 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6558 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6559
6560 DestAddr = DestAddr.withElementType(Int8PtrTy);
6561 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6562
6563 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6564 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6565 }
6566
6567 case Builtin::BI__builtin_get_device_side_mangled_name: {
6568 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6569 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6570 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6571 return RValue::get(Str.getPointer());
6572 }
6573 }
6574
6575 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6576 // the call using the normal call path, but using the unmangled
6577 // version of the function name.
6578 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6579 return emitLibraryCall(*this, FD, E,
6580 CGM.getBuiltinLibFunction(FD, BuiltinID));
6581
6582 // If this is a predefined lib function (e.g. malloc), emit the call
6583 // using exactly the normal call path.
6584 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6585 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6586
6587 // Check that a call to a target specific builtin has the correct target
6588 // features.
6589 // This is down here to avoid non-target specific builtins, however, if
6590 // generic builtins start to require generic target features then we
6591 // can move this up to the beginning of the function.
6593
6594 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6595 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6596
6597 // See if we have a target specific intrinsic.
6598 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6599 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6600 StringRef Prefix =
6601 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6602 if (!Prefix.empty()) {
6603 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6604 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6605 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6606 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6607 // NOTE we don't need to perform a compatibility flag check here since the
6608 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6609 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6610 if (IntrinsicID == Intrinsic::not_intrinsic)
6611 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6612 }
6613
6614 if (IntrinsicID != Intrinsic::not_intrinsic) {
6616
6617 // Find out if any arguments are required to be integer constant
6618 // expressions.
6619 unsigned ICEArguments = 0;
6621 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6622 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6623
6624 Function *F = CGM.getIntrinsic(IntrinsicID);
6625 llvm::FunctionType *FTy = F->getFunctionType();
6626
6627 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6628 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6629 // If the intrinsic arg type is different from the builtin arg type
6630 // we need to do a bit cast.
6631 llvm::Type *PTy = FTy->getParamType(i);
6632 if (PTy != ArgValue->getType()) {
6633 // XXX - vector of pointers?
6634 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6635 if (PtrTy->getAddressSpace() !=
6636 ArgValue->getType()->getPointerAddressSpace()) {
6637 ArgValue = Builder.CreateAddrSpaceCast(
6638 ArgValue, llvm::PointerType::get(getLLVMContext(),
6639 PtrTy->getAddressSpace()));
6640 }
6641 }
6642
6643 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6644 // in amx intrinsics.
6645 if (PTy->isX86_AMXTy())
6646 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6647 {ArgValue->getType()}, {ArgValue});
6648 else
6649 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6650 }
6651
6652 Args.push_back(ArgValue);
6653 }
6654
6655 Value *V = Builder.CreateCall(F, Args);
6656 QualType BuiltinRetType = E->getType();
6657
6658 llvm::Type *RetTy = VoidTy;
6659 if (!BuiltinRetType->isVoidType())
6660 RetTy = ConvertType(BuiltinRetType);
6661
6662 if (RetTy != V->getType()) {
6663 // XXX - vector of pointers?
6664 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6665 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6667 V, llvm::PointerType::get(getLLVMContext(),
6668 PtrTy->getAddressSpace()));
6669 }
6670 }
6671
6672 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6673 // in amx intrinsics.
6674 if (V->getType()->isX86_AMXTy())
6675 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6676 {V});
6677 else
6678 V = Builder.CreateBitCast(V, RetTy);
6679 }
6680
6681 if (RetTy->isVoidTy())
6682 return RValue::get(nullptr);
6683
6684 return RValue::get(V);
6685 }
6686
6687 // Some target-specific builtins can have aggregate return values, e.g.
6688 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6689 // ReturnValue to be non-null, so that the target-specific emission code can
6690 // always just emit into it.
6692 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6693 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6694 ReturnValue = ReturnValueSlot(DestPtr, false);
6695 }
6696
6697 // Now see if we can emit a target-specific builtin.
6698 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6699 switch (EvalKind) {
6700 case TEK_Scalar:
6701 if (V->getType()->isVoidTy())
6702 return RValue::get(nullptr);
6703 return RValue::get(V);
6704 case TEK_Aggregate:
6705 return RValue::getAggregate(ReturnValue.getAddress(),
6706 ReturnValue.isVolatile());
6707 case TEK_Complex:
6708 llvm_unreachable("No current target builtin returns complex");
6709 }
6710 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6711 }
6712
6713 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6714 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
6715 switch (EvalKind) {
6716 case TEK_Scalar:
6717 if (V->getType()->isVoidTy())
6718 return RValue::get(nullptr);
6719 return RValue::get(V);
6720 case TEK_Aggregate:
6721 return RValue::getAggregate(ReturnValue.getAddress(),
6722 ReturnValue.isVolatile());
6723 case TEK_Complex:
6724 llvm_unreachable("No current hlsl builtin returns complex");
6725 }
6726 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6727 }
6728
6729 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6730 return EmitHipStdParUnsupportedBuiltin(this, FD);
6731
6732 ErrorUnsupported(E, "builtin function");
6733
6734 // Unknown builtin, for now just dump it out and return undef.
6735 return GetUndefRValue(E->getType());
6736}
6737
6739 unsigned BuiltinID, const CallExpr *E,
6740 ReturnValueSlot ReturnValue,
6741 llvm::Triple::ArchType Arch) {
6742 // When compiling in HipStdPar mode we have to be conservative in rejecting
6743 // target specific features in the FE, and defer the possible error to the
6744 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6745 // referenced by an accelerator executable function, we emit an error.
6746 // Returning nullptr here leads to the builtin being handled in
6747 // EmitStdParUnsupportedBuiltin.
6748 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6749 Arch != CGF->getTarget().getTriple().getArch())
6750 return nullptr;
6751
6752 switch (Arch) {
6753 case llvm::Triple::arm:
6754 case llvm::Triple::armeb:
6755 case llvm::Triple::thumb:
6756 case llvm::Triple::thumbeb:
6757 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6758 case llvm::Triple::aarch64:
6759 case llvm::Triple::aarch64_32:
6760 case llvm::Triple::aarch64_be:
6761 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6762 case llvm::Triple::bpfeb:
6763 case llvm::Triple::bpfel:
6764 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6765 case llvm::Triple::x86:
6766 case llvm::Triple::x86_64:
6767 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6768 case llvm::Triple::ppc:
6769 case llvm::Triple::ppcle:
6770 case llvm::Triple::ppc64:
6771 case llvm::Triple::ppc64le:
6772 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6773 case llvm::Triple::r600:
6774 case llvm::Triple::amdgcn:
6775 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6776 case llvm::Triple::systemz:
6777 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6778 case llvm::Triple::nvptx:
6779 case llvm::Triple::nvptx64:
6780 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6781 case llvm::Triple::wasm32:
6782 case llvm::Triple::wasm64:
6783 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6784 case llvm::Triple::hexagon:
6785 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6786 case llvm::Triple::riscv32:
6787 case llvm::Triple::riscv64:
6788 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6789 case llvm::Triple::spirv:
6790 return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E);
6791 case llvm::Triple::spirv64:
6792 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6793 return nullptr;
6794 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6795 default:
6796 return nullptr;
6797 }
6798}
6799
6801 const CallExpr *E,
6802 ReturnValueSlot ReturnValue) {
6803 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6804 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6806 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6807 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6808 }
6809
6810 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6811 getTarget().getTriple().getArch());
6812}
6813
6814static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6815 NeonTypeFlags TypeFlags,
6816 bool HasLegalHalfType = true,
6817 bool V1Ty = false,
6818 bool AllowBFloatArgsAndRet = true) {
6819 int IsQuad = TypeFlags.isQuad();
6820 switch (TypeFlags.getEltType()) {
6823 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6826 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6828 if (AllowBFloatArgsAndRet)
6829 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6830 else
6831 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6833 if (HasLegalHalfType)
6834 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6835 else
6836 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6838 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6841 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6843 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6844 // There is a lot of i128 and f128 API missing.
6845 // so we use v16i8 to represent poly128 and get pattern matched.
6846 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6848 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6850 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6851 }
6852 llvm_unreachable("Unknown vector element type!");
6853}
6854
6855static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6856 NeonTypeFlags IntTypeFlags) {
6857 int IsQuad = IntTypeFlags.isQuad();
6858 switch (IntTypeFlags.getEltType()) {
6860 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6862 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6864 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6865 default:
6866 llvm_unreachable("Type can't be converted to floating-point!");
6867 }
6868}
6869
6871 const ElementCount &Count) {
6872 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6873 return Builder.CreateShuffleVector(V, V, SV, "lane");
6874}
6875
6877 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6878 return EmitNeonSplat(V, C, EC);
6879}
6880
6882 const char *name,
6883 unsigned shift, bool rightshift) {
6884 unsigned j = 0;
6885 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6886 ai != ae; ++ai, ++j) {
6887 if (F->isConstrainedFPIntrinsic())
6888 if (ai->getType()->isMetadataTy())
6889 continue;
6890 if (shift > 0 && shift == j)
6891 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6892 else
6893 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6894 }
6895
6896 if (F->isConstrainedFPIntrinsic())
6897 return Builder.CreateConstrainedFPCall(F, Ops, name);
6898 else
6899 return Builder.CreateCall(F, Ops, name);
6900}
6901
6903 bool neg) {
6904 int SV = cast<ConstantInt>(V)->getSExtValue();
6905 return ConstantInt::get(Ty, neg ? -SV : SV);
6906}
6907
6908// Right-shift a vector by a constant.
6910 llvm::Type *Ty, bool usgn,
6911 const char *name) {
6912 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6913
6914 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6915 int EltSize = VTy->getScalarSizeInBits();
6916
6917 Vec = Builder.CreateBitCast(Vec, Ty);
6918
6919 // lshr/ashr are undefined when the shift amount is equal to the vector
6920 // element size.
6921 if (ShiftAmt == EltSize) {
6922 if (usgn) {
6923 // Right-shifting an unsigned value by its size yields 0.
6924 return llvm::ConstantAggregateZero::get(VTy);
6925 } else {
6926 // Right-shifting a signed value by its size is equivalent
6927 // to a shift of size-1.
6928 --ShiftAmt;
6929 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6930 }
6931 }
6932
6933 Shift = EmitNeonShiftVector(Shift, Ty, false);
6934 if (usgn)
6935 return Builder.CreateLShr(Vec, Shift, name);
6936 else
6937 return Builder.CreateAShr(Vec, Shift, name);
6938}
6939
6940enum {
6941 AddRetType = (1 << 0),
6942 Add1ArgType = (1 << 1),
6943 Add2ArgTypes = (1 << 2),
6944
6947
6949 UnsignedAlts = (1 << 6),
6950
6953
6961
6962namespace {
6963struct ARMVectorIntrinsicInfo {
6964 const char *NameHint;
6965 unsigned BuiltinID;
6966 unsigned LLVMIntrinsic;
6967 unsigned AltLLVMIntrinsic;
6969
6970 bool operator<(unsigned RHSBuiltinID) const {
6971 return BuiltinID < RHSBuiltinID;
6972 }
6973 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6974 return BuiltinID < TE.BuiltinID;
6975 }
6976};
6977} // end anonymous namespace
6978
6979#define NEONMAP0(NameBase) \
6980 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6981
6982#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6983 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6984 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6985
6986#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6987 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6988 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6989 TypeModifier }
6990
6991static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6992 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6993 NEONMAP0(splat_lane_v),
6994 NEONMAP0(splat_laneq_v),
6995 NEONMAP0(splatq_lane_v),
6996 NEONMAP0(splatq_laneq_v),
6997 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6998 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6999 NEONMAP1(vabs_v, arm_neon_vabs, 0),
7000 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
7001 NEONMAP0(vadd_v),
7002 NEONMAP0(vaddhn_v),
7003 NEONMAP0(vaddq_v),
7004 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
7005 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
7006 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
7007 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
7008 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
7009 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
7010 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
7011 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
7012 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
7013 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
7014 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
7015 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
7016 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
7017 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
7018 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
7019 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
7020 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
7021 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
7022 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
7023 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
7024 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
7025 NEONMAP1(vcage_v, arm_neon_vacge, 0),
7026 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
7027 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
7028 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
7029 NEONMAP1(vcale_v, arm_neon_vacge, 0),
7030 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
7031 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
7032 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
7033 NEONMAP0(vceqz_v),
7034 NEONMAP0(vceqzq_v),
7035 NEONMAP0(vcgez_v),
7036 NEONMAP0(vcgezq_v),
7037 NEONMAP0(vcgtz_v),
7038 NEONMAP0(vcgtzq_v),
7039 NEONMAP0(vclez_v),
7040 NEONMAP0(vclezq_v),
7041 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
7042 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
7043 NEONMAP0(vcltz_v),
7044 NEONMAP0(vcltzq_v),
7045 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7046 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7047 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7048 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7049 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
7050 NEONMAP0(vcvt_f16_s16),
7051 NEONMAP0(vcvt_f16_u16),
7052 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
7053 NEONMAP0(vcvt_f32_v),
7054 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7055 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7056 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7057 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7058 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7059 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7060 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7061 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7062 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7063 NEONMAP0(vcvt_s16_f16),
7064 NEONMAP0(vcvt_s32_v),
7065 NEONMAP0(vcvt_s64_v),
7066 NEONMAP0(vcvt_u16_f16),
7067 NEONMAP0(vcvt_u32_v),
7068 NEONMAP0(vcvt_u64_v),
7069 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
7070 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
7071 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
7072 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
7073 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
7074 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
7075 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
7076 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
7077 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
7078 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
7079 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
7080 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
7081 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
7082 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
7083 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
7084 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
7085 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
7086 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
7087 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
7088 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
7089 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
7090 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
7091 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
7092 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
7093 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
7094 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
7095 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
7096 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
7097 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
7098 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
7099 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
7100 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
7101 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
7102 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
7103 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
7104 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
7105 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
7106 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
7107 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
7108 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
7109 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
7110 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
7111 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
7112 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
7113 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
7114 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
7115 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
7116 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
7117 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
7118 NEONMAP0(vcvtq_f16_s16),
7119 NEONMAP0(vcvtq_f16_u16),
7120 NEONMAP0(vcvtq_f32_v),
7121 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7122 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7123 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7124 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7125 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7126 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7127 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7128 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7129 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7130 NEONMAP0(vcvtq_s16_f16),
7131 NEONMAP0(vcvtq_s32_v),
7132 NEONMAP0(vcvtq_s64_v),
7133 NEONMAP0(vcvtq_u16_f16),
7134 NEONMAP0(vcvtq_u32_v),
7135 NEONMAP0(vcvtq_u64_v),
7136 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
7137 NEONMAP1(vdot_u32, arm_neon_udot, 0),
7138 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
7139 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
7140 NEONMAP0(vext_v),
7141 NEONMAP0(vextq_v),
7142 NEONMAP0(vfma_v),
7143 NEONMAP0(vfmaq_v),
7144 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7145 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7146 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7147 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7148 NEONMAP0(vld1_dup_v),
7149 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7150 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7151 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7152 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7153 NEONMAP0(vld1q_dup_v),
7154 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7155 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7156 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7157 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7158 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7159 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7160 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7161 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7162 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7163 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7164 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7165 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7166 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7167 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7168 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7169 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7170 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7171 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7172 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7173 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7174 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7175 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7176 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7177 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
7178 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
7179 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7180 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7181 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
7182 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
7183 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7184 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7185 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7186 NEONMAP0(vmovl_v),
7187 NEONMAP0(vmovn_v),
7188 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
7189 NEONMAP0(vmull_v),
7190 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
7191 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7192 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7193 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
7194 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7195 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7196 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
7197 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
7198 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
7199 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
7200 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
7201 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7202 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7203 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7204 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7205 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
7206 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
7207 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
7208 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
7209 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
7210 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
7211 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
7212 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
7213 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
7214 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
7215 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
7216 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
7217 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
7218 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
7219 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
7220 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
7221 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
7222 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7223 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7224 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7225 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7226 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7227 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7228 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7229 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7230 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7231 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7232 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
7233 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7234 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7235 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
7236 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
7237 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7238 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7239 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
7240 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
7241 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
7242 NEONMAP0(vrndi_v),
7243 NEONMAP0(vrndiq_v),
7244 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
7245 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
7246 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
7247 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
7248 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
7249 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
7250 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
7251 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
7252 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
7253 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7254 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7255 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7256 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7257 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7258 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7259 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
7260 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
7261 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
7262 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7263 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7264 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7265 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7266 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7267 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7268 NEONMAP0(vshl_n_v),
7269 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7270 NEONMAP0(vshll_n_v),
7271 NEONMAP0(vshlq_n_v),
7272 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7273 NEONMAP0(vshr_n_v),
7274 NEONMAP0(vshrn_n_v),
7275 NEONMAP0(vshrq_n_v),
7276 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7277 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7278 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7279 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7280 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7281 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7282 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7283 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7284 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7285 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7286 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7287 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7288 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7289 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7290 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7291 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7292 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7293 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7294 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7295 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7296 NEONMAP0(vsubhn_v),
7297 NEONMAP0(vtrn_v),
7298 NEONMAP0(vtrnq_v),
7299 NEONMAP0(vtst_v),
7300 NEONMAP0(vtstq_v),
7301 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7302 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7303 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7304 NEONMAP0(vuzp_v),
7305 NEONMAP0(vuzpq_v),
7306 NEONMAP0(vzip_v),
7307 NEONMAP0(vzipq_v)
7308};
7309
7310static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7311 NEONMAP0(splat_lane_v),
7312 NEONMAP0(splat_laneq_v),
7313 NEONMAP0(splatq_lane_v),
7314 NEONMAP0(splatq_laneq_v),
7315 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7316 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7317 NEONMAP0(vadd_v),
7318 NEONMAP0(vaddhn_v),
7319 NEONMAP0(vaddq_p128),
7320 NEONMAP0(vaddq_v),
7321 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7322 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7323 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7324 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7325 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7326 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7327 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7328 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7329 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7330 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7331 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7332 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7333 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7334 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7335 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7336 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7337 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7338 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7339 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7340 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7341 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7342 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7343 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7344 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
7345 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7346 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7347 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
7348 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7349 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7350 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7351 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7352 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7353 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7354 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7355 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7356 NEONMAP0(vceqz_v),
7357 NEONMAP0(vceqzq_v),
7358 NEONMAP0(vcgez_v),
7359 NEONMAP0(vcgezq_v),
7360 NEONMAP0(vcgtz_v),
7361 NEONMAP0(vcgtzq_v),
7362 NEONMAP0(vclez_v),
7363 NEONMAP0(vclezq_v),
7364 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
7365 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
7366 NEONMAP0(vcltz_v),
7367 NEONMAP0(vcltzq_v),
7368 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7369 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7370 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7371 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7372 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7373 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7374 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7375 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7376 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7377 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7378 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7379 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7380 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
7381 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7382 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7383 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
7384 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7385 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7386 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
7387 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7388 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7389 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
7390 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7391 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7392 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7393 NEONMAP0(vcvt_f16_s16),
7394 NEONMAP0(vcvt_f16_u16),
7395 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7396 NEONMAP0(vcvt_f32_v),
7397 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7398 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7399 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7400 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7401 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7402 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7403 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7404 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7405 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7406 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7407 NEONMAP0(vcvtq_f16_s16),
7408 NEONMAP0(vcvtq_f16_u16),
7409 NEONMAP0(vcvtq_f32_v),
7410 NEONMAP0(vcvtq_high_bf16_f32),
7411 NEONMAP0(vcvtq_low_bf16_f32),
7412 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7413 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7414 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7415 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7416 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7417 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7418 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7419 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7420 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7421 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7422 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
7423 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7424 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7425 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7426 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7427 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7428 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7429 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7430 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7431 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7432 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7433 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7434 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7435 NEONMAP0(vext_v),
7436 NEONMAP0(vextq_v),
7437 NEONMAP0(vfma_v),
7438 NEONMAP0(vfmaq_v),
7439 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7440 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7441 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7442 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7443 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7444 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7445 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7446 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7447 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7448 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7449 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7450 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7451 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7452 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7453 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7454 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7455 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7456 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7457 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7458 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7459 NEONMAP0(vmovl_v),
7460 NEONMAP0(vmovn_v),
7461 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7462 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7463 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7464 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7465 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7466 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7467 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7468 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7469 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7470 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7471 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7472 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7473 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7474 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7475 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7476 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7477 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7478 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7479 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7480 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7481 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7482 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7483 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7484 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7485 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7486 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7487 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7488 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7489 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7490 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7491 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7492 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7493 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7494 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7495 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7496 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7497 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7498 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7499 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7500 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7501 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7502 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7503 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7504 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7505 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7506 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7507 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7508 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7509 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7510 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7511 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7512 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7513 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7514 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7515 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7516 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7517 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7518 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7519 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7520 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7521 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7522 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7523 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7524 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7525 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7526 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7527 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7528 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7529 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7530 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7531 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7532 NEONMAP0(vrndi_v),
7533 NEONMAP0(vrndiq_v),
7534 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7535 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7536 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7537 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7538 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7539 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7540 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7541 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7542 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7543 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7544 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7545 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7546 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7547 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7548 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7549 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7550 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7551 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7552 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7553 NEONMAP0(vshl_n_v),
7554 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7555 NEONMAP0(vshll_n_v),
7556 NEONMAP0(vshlq_n_v),
7557 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7558 NEONMAP0(vshr_n_v),
7559 NEONMAP0(vshrn_n_v),
7560 NEONMAP0(vshrq_n_v),
7561 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7562 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7563 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7564 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7565 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7566 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7567 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7568 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7569 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7570 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7571 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7572 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7573 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7574 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7575 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7576 NEONMAP0(vsubhn_v),
7577 NEONMAP0(vtst_v),
7578 NEONMAP0(vtstq_v),
7579 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7580 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7581 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7582 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7583};
7584
7585static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7586 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7587 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7588 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7589 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7590 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7591 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7592 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7593 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7594 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7595 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7596 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7597 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7598 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7599 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7600 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7601 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7602 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7603 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7604 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7605 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7606 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7607 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7608 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7609 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7610 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7611 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7612 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7613 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7614 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7615 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7616 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7617 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7618 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7619 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7620 NEONMAP0(vcvth_bf16_f32),
7621 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7622 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7623 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7624 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7625 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7626 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7627 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7628 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7629 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7630 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7631 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7632 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7633 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7634 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7635 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7636 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7637 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7638 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7639 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7640 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7641 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7642 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7643 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7644 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7645 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7646 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7647 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7648 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7649 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7650 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7651 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7652 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7653 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7654 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7655 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7656 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7657 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7658 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7659 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7660 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7661 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7662 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7663 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7664 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7665 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7666 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7667 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7668 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7669 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7670 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7671 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7672 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7673 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7674 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7675 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7676 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7677 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7678 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7679 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7680 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7681 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7682 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7683 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7684 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7685 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7686 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7687 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7688 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7689 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7690 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7691 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7692 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7693 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7694 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7695 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7696 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7697 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7698 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7699 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7700 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7701 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7702 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7703 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7704 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7705 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7706 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7707 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7708 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7709 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7710 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7711 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7712 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7713 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7714 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7715 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7716 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7717 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7718 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7719 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7720 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7721 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7722 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7723 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7724 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7725 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7726 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7727 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7728 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7729 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7730 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7731 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7732 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7733 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7734 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7735 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7736 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7737 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7738 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7739 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7740 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7741 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7742 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7743 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7744 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7745 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7746 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7747 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7748 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7749 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7750 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7751 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7752 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7753 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7754 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7755 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7756 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7757 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7758 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7759 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7760 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7761 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7762 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7763 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7764 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7765 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7766 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7767 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7768 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7769 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7770 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7771 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7772 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7773 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7774 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7775 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7776 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7777 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7778 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7779 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7780 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7781 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7782 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7783 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7784 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7785 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7786 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7787 // FP16 scalar intrinisics go here.
7788 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7789 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7790 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7791 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7792 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7793 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7794 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7795 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7796 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7797 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7798 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7799 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7800 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7801 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7802 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7803 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7804 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7805 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7806 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7807 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7808 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7809 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7810 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7811 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7812 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7813 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7814 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7815 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7816 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7817 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7818 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7819 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7820 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7821 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7822};
7823
7824// Some intrinsics are equivalent for codegen.
7825static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7826 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7827 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7828 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7829 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7830 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7831 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7832 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7833 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7834 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7835 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7836 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7837 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7838 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7839 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7840 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7841 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7842 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7843 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7844 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7845 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7846 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7847 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7848 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7849 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7850 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7851 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7852 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7853 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7854 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7855 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7856 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7857 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7858 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7859 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7860 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7861 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7862 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7863 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7864 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7865 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7866 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7867 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7868 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7869 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7870 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7871 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7872 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7873 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7874 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7875 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7876 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7877 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7878 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7879 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7880 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7881 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7882 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7883 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7884 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7885 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7886 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7887 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7888 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7889 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7890 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7891 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7892 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7893 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7894 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7895 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7896 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7897 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7898 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7899 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7900 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7901 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7902 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7903 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7904 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7905 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7906 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7907 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7908 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7909 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7910 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7911 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7912 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7913 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7914 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7915 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7916 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7917 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7918 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7919 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7920 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7921 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7922 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7923 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7924 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7925 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7926 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7927 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7928 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7929 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7930 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7931 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7932 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7933 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7934 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7935 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7936 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7937 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7938 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7939 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7940 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7941 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7942 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7943 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7944 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7945 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7946 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7947 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7948 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7949 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7950 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7951 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7952 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7953 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7954 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7955 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7956 // arbitrary one to be handled as tha canonical variation.
7957 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7958 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7959 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7960 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7961 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7962 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7963 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7964 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7965 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7966 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7967 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7968 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7969};
7970
7971#undef NEONMAP0
7972#undef NEONMAP1
7973#undef NEONMAP2
7974
7975#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7976 { \
7977 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7978 TypeModifier \
7979 }
7980
7981#define SVEMAP2(NameBase, TypeModifier) \
7982 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7983static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7984#define GET_SVE_LLVM_INTRINSIC_MAP
7985#include "clang/Basic/arm_sve_builtin_cg.inc"
7986#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7987#undef GET_SVE_LLVM_INTRINSIC_MAP
7988};
7989
7990#undef SVEMAP1
7991#undef SVEMAP2
7992
7993#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7994 { \
7995 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7996 TypeModifier \
7997 }
7998
7999#define SMEMAP2(NameBase, TypeModifier) \
8000 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
8001static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
8002#define GET_SME_LLVM_INTRINSIC_MAP
8003#include "clang/Basic/arm_sme_builtin_cg.inc"
8004#undef GET_SME_LLVM_INTRINSIC_MAP
8005};
8006
8007#undef SMEMAP1
8008#undef SMEMAP2
8009
8011
8016
8017static const ARMVectorIntrinsicInfo *
8019 unsigned BuiltinID, bool &MapProvenSorted) {
8020
8021#ifndef NDEBUG
8022 if (!MapProvenSorted) {
8023 assert(llvm::is_sorted(IntrinsicMap));
8024 MapProvenSorted = true;
8025 }
8026#endif
8027
8028 const ARMVectorIntrinsicInfo *Builtin =
8029 llvm::lower_bound(IntrinsicMap, BuiltinID);
8030
8031 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
8032 return Builtin;
8033
8034 return nullptr;
8035}
8036
8038 unsigned Modifier,
8039 llvm::Type *ArgType,
8040 const CallExpr *E) {
8041 int VectorSize = 0;
8042 if (Modifier & Use64BitVectors)
8043 VectorSize = 64;
8044 else if (Modifier & Use128BitVectors)
8045 VectorSize = 128;
8046
8047 // Return type.
8049 if (Modifier & AddRetType) {
8050 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
8051 if (Modifier & VectorizeRetType)
8052 Ty = llvm::FixedVectorType::get(
8053 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
8054
8055 Tys.push_back(Ty);
8056 }
8057
8058 // Arguments.
8059 if (Modifier & VectorizeArgTypes) {
8060 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
8061 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
8062 }
8063
8064 if (Modifier & (Add1ArgType | Add2ArgTypes))
8065 Tys.push_back(ArgType);
8066
8067 if (Modifier & Add2ArgTypes)
8068 Tys.push_back(ArgType);
8069
8070 if (Modifier & InventFloatType)
8071 Tys.push_back(FloatTy);
8072
8073 return CGM.getIntrinsic(IntrinsicID, Tys);
8074}
8075
8077 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
8078 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
8079 unsigned BuiltinID = SISDInfo.BuiltinID;
8080 unsigned int Int = SISDInfo.LLVMIntrinsic;
8081 unsigned Modifier = SISDInfo.TypeModifier;
8082 const char *s = SISDInfo.NameHint;
8083
8084 switch (BuiltinID) {
8085 case NEON::BI__builtin_neon_vcled_s64:
8086 case NEON::BI__builtin_neon_vcled_u64:
8087 case NEON::BI__builtin_neon_vcles_f32:
8088 case NEON::BI__builtin_neon_vcled_f64:
8089 case NEON::BI__builtin_neon_vcltd_s64:
8090 case NEON::BI__builtin_neon_vcltd_u64:
8091 case NEON::BI__builtin_neon_vclts_f32:
8092 case NEON::BI__builtin_neon_vcltd_f64:
8093 case NEON::BI__builtin_neon_vcales_f32:
8094 case NEON::BI__builtin_neon_vcaled_f64:
8095 case NEON::BI__builtin_neon_vcalts_f32:
8096 case NEON::BI__builtin_neon_vcaltd_f64:
8097 // Only one direction of comparisons actually exist, cmle is actually a cmge
8098 // with swapped operands. The table gives us the right intrinsic but we
8099 // still need to do the swap.
8100 std::swap(Ops[0], Ops[1]);
8101 break;
8102 }
8103
8104 assert(Int && "Generic code assumes a valid intrinsic");
8105
8106 // Determine the type(s) of this overloaded AArch64 intrinsic.
8107 const Expr *Arg = E->getArg(0);
8108 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
8109 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
8110
8111 int j = 0;
8112 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
8113 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
8114 ai != ae; ++ai, ++j) {
8115 llvm::Type *ArgTy = ai->getType();
8116 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
8117 ArgTy->getPrimitiveSizeInBits())
8118 continue;
8119
8120 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
8121 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
8122 // it before inserting.
8123 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
8124 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
8125 Ops[j] =
8126 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
8127 }
8128
8129 Value *Result = CGF.EmitNeonCall(F, Ops, s);
8130 llvm::Type *ResultType = CGF.ConvertType(E->getType());
8131 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
8132 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
8133 return CGF.Builder.CreateExtractElement(Result, C0);
8134
8135 return CGF.Builder.CreateBitCast(Result, ResultType, s);
8136}
8137
8139 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
8140 const char *NameHint, unsigned Modifier, const CallExpr *E,
8141 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
8142 llvm::Triple::ArchType Arch) {
8143 // Get the last argument, which specifies the vector type.
8144 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8145 std::optional<llvm::APSInt> NeonTypeConst =
8147 if (!NeonTypeConst)
8148 return nullptr;
8149
8150 // Determine the type of this overloaded NEON intrinsic.
8151 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8152 bool Usgn = Type.isUnsigned();
8153 bool Quad = Type.isQuad();
8154 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
8155 const bool AllowBFloatArgsAndRet =
8156 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8157
8158 llvm::FixedVectorType *VTy =
8159 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
8160 llvm::Type *Ty = VTy;
8161 if (!Ty)
8162 return nullptr;
8163
8164 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8165 return Builder.getInt32(addr.getAlignment().getQuantity());
8166 };
8167
8168 unsigned Int = LLVMIntrinsic;
8169 if ((Modifier & UnsignedAlts) && !Usgn)
8170 Int = AltLLVMIntrinsic;
8171
8172 switch (BuiltinID) {
8173 default: break;
8174 case NEON::BI__builtin_neon_splat_lane_v:
8175 case NEON::BI__builtin_neon_splat_laneq_v:
8176 case NEON::BI__builtin_neon_splatq_lane_v:
8177 case NEON::BI__builtin_neon_splatq_laneq_v: {
8178 auto NumElements = VTy->getElementCount();
8179 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8180 NumElements = NumElements * 2;
8181 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8182 NumElements = NumElements.divideCoefficientBy(2);
8183
8184 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8185 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8186 }
8187 case NEON::BI__builtin_neon_vpadd_v:
8188 case NEON::BI__builtin_neon_vpaddq_v:
8189 // We don't allow fp/int overloading of intrinsics.
8190 if (VTy->getElementType()->isFloatingPointTy() &&
8191 Int == Intrinsic::aarch64_neon_addp)
8192 Int = Intrinsic::aarch64_neon_faddp;
8193 break;
8194 case NEON::BI__builtin_neon_vabs_v:
8195 case NEON::BI__builtin_neon_vabsq_v:
8196 if (VTy->getElementType()->isFloatingPointTy())
8197 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
8198 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
8199 case NEON::BI__builtin_neon_vadd_v:
8200 case NEON::BI__builtin_neon_vaddq_v: {
8201 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
8202 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8203 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8204 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
8205 return Builder.CreateBitCast(Ops[0], Ty);
8206 }
8207 case NEON::BI__builtin_neon_vaddhn_v: {
8208 llvm::FixedVectorType *SrcTy =
8209 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8210
8211 // %sum = add <4 x i32> %lhs, %rhs
8212 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8213 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8214 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
8215
8216 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8217 Constant *ShiftAmt =
8218 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8219 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
8220
8221 // %res = trunc <4 x i32> %high to <4 x i16>
8222 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
8223 }
8224 case NEON::BI__builtin_neon_vcale_v:
8225 case NEON::BI__builtin_neon_vcaleq_v:
8226 case NEON::BI__builtin_neon_vcalt_v:
8227 case NEON::BI__builtin_neon_vcaltq_v:
8228 std::swap(Ops[0], Ops[1]);
8229 [[fallthrough]];
8230 case NEON::BI__builtin_neon_vcage_v:
8231 case NEON::BI__builtin_neon_vcageq_v:
8232 case NEON::BI__builtin_neon_vcagt_v:
8233 case NEON::BI__builtin_neon_vcagtq_v: {
8234 llvm::Type *Ty;
8235 switch (VTy->getScalarSizeInBits()) {
8236 default: llvm_unreachable("unexpected type");
8237 case 32:
8238 Ty = FloatTy;
8239 break;
8240 case 64:
8241 Ty = DoubleTy;
8242 break;
8243 case 16:
8244 Ty = HalfTy;
8245 break;
8246 }
8247 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8248 llvm::Type *Tys[] = { VTy, VecFlt };
8249 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8250 return EmitNeonCall(F, Ops, NameHint);
8251 }
8252 case NEON::BI__builtin_neon_vceqz_v:
8253 case NEON::BI__builtin_neon_vceqzq_v:
8254 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8255 ICmpInst::ICMP_EQ, "vceqz");
8256 case NEON::BI__builtin_neon_vcgez_v:
8257 case NEON::BI__builtin_neon_vcgezq_v:
8258 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8259 ICmpInst::ICMP_SGE, "vcgez");
8260 case NEON::BI__builtin_neon_vclez_v:
8261 case NEON::BI__builtin_neon_vclezq_v:
8262 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8263 ICmpInst::ICMP_SLE, "vclez");
8264 case NEON::BI__builtin_neon_vcgtz_v:
8265 case NEON::BI__builtin_neon_vcgtzq_v:
8266 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8267 ICmpInst::ICMP_SGT, "vcgtz");
8268 case NEON::BI__builtin_neon_vcltz_v:
8269 case NEON::BI__builtin_neon_vcltzq_v:
8270 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8271 ICmpInst::ICMP_SLT, "vcltz");
8272 case NEON::BI__builtin_neon_vclz_v:
8273 case NEON::BI__builtin_neon_vclzq_v:
8274 // We generate target-independent intrinsic, which needs a second argument
8275 // for whether or not clz of zero is undefined; on ARM it isn't.
8276 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
8277 break;
8278 case NEON::BI__builtin_neon_vcvt_f32_v:
8279 case NEON::BI__builtin_neon_vcvtq_f32_v:
8280 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8281 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
8282 HasLegalHalfType);
8283 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8284 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8285 case NEON::BI__builtin_neon_vcvt_f16_s16:
8286 case NEON::BI__builtin_neon_vcvt_f16_u16:
8287 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8288 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8289 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8290 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
8291 HasLegalHalfType);
8292 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8293 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8294 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8295 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8296 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8297 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8298 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8299 Function *F = CGM.getIntrinsic(Int, Tys);
8300 return EmitNeonCall(F, Ops, "vcvt_n");
8301 }
8302 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8303 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8304 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8305 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8306 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8307 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8308 Function *F = CGM.getIntrinsic(Int, Tys);
8309 return EmitNeonCall(F, Ops, "vcvt_n");
8310 }
8311 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8312 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8313 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8314 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8315 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8316 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8317 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8318 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8319 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8320 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8321 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8322 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8323 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8324 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8325 return EmitNeonCall(F, Ops, "vcvt_n");
8326 }
8327 case NEON::BI__builtin_neon_vcvt_s32_v:
8328 case NEON::BI__builtin_neon_vcvt_u32_v:
8329 case NEON::BI__builtin_neon_vcvt_s64_v:
8330 case NEON::BI__builtin_neon_vcvt_u64_v:
8331 case NEON::BI__builtin_neon_vcvt_s16_f16:
8332 case NEON::BI__builtin_neon_vcvt_u16_f16:
8333 case NEON::BI__builtin_neon_vcvtq_s32_v:
8334 case NEON::BI__builtin_neon_vcvtq_u32_v:
8335 case NEON::BI__builtin_neon_vcvtq_s64_v:
8336 case NEON::BI__builtin_neon_vcvtq_u64_v:
8337 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8338 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8339 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8340 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
8341 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
8342 }
8343 case NEON::BI__builtin_neon_vcvta_s16_f16:
8344 case NEON::BI__builtin_neon_vcvta_s32_v:
8345 case NEON::BI__builtin_neon_vcvta_s64_v:
8346 case NEON::BI__builtin_neon_vcvta_u16_f16:
8347 case NEON::BI__builtin_neon_vcvta_u32_v:
8348 case NEON::BI__builtin_neon_vcvta_u64_v:
8349 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8350 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8351 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8352 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8353 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8354 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8355 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8356 case NEON::BI__builtin_neon_vcvtn_s32_v:
8357 case NEON::BI__builtin_neon_vcvtn_s64_v:
8358 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8359 case NEON::BI__builtin_neon_vcvtn_u32_v:
8360 case NEON::BI__builtin_neon_vcvtn_u64_v:
8361 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8362 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8363 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8364 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8365 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8366 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8367 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8368 case NEON::BI__builtin_neon_vcvtp_s32_v:
8369 case NEON::BI__builtin_neon_vcvtp_s64_v:
8370 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8371 case NEON::BI__builtin_neon_vcvtp_u32_v:
8372 case NEON::BI__builtin_neon_vcvtp_u64_v:
8373 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8374 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8375 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8376 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8377 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8378 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8379 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8380 case NEON::BI__builtin_neon_vcvtm_s32_v:
8381 case NEON::BI__builtin_neon_vcvtm_s64_v:
8382 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8383 case NEON::BI__builtin_neon_vcvtm_u32_v:
8384 case NEON::BI__builtin_neon_vcvtm_u64_v:
8385 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8386 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8387 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8388 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8389 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8390 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8391 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8392 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8393 }
8394 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8395 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8396 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8397
8398 }
8399 case NEON::BI__builtin_neon_vext_v:
8400 case NEON::BI__builtin_neon_vextq_v: {
8401 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8402 SmallVector<int, 16> Indices;
8403 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8404 Indices.push_back(i+CV);
8405
8406 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8407 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8408 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
8409 }
8410 case NEON::BI__builtin_neon_vfma_v:
8411 case NEON::BI__builtin_neon_vfmaq_v: {
8412 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8413 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8414 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8415
8416 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8418 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8419 {Ops[1], Ops[2], Ops[0]});
8420 }
8421 case NEON::BI__builtin_neon_vld1_v:
8422 case NEON::BI__builtin_neon_vld1q_v: {
8423 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8424 Ops.push_back(getAlignmentValue32(PtrOp0));
8425 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
8426 }
8427 case NEON::BI__builtin_neon_vld1_x2_v:
8428 case NEON::BI__builtin_neon_vld1q_x2_v:
8429 case NEON::BI__builtin_neon_vld1_x3_v:
8430 case NEON::BI__builtin_neon_vld1q_x3_v:
8431 case NEON::BI__builtin_neon_vld1_x4_v:
8432 case NEON::BI__builtin_neon_vld1q_x4_v: {
8433 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8434 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8435 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
8436 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8437 }
8438 case NEON::BI__builtin_neon_vld2_v:
8439 case NEON::BI__builtin_neon_vld2q_v:
8440 case NEON::BI__builtin_neon_vld3_v:
8441 case NEON::BI__builtin_neon_vld3q_v:
8442 case NEON::BI__builtin_neon_vld4_v:
8443 case NEON::BI__builtin_neon_vld4q_v:
8444 case NEON::BI__builtin_neon_vld2_dup_v:
8445 case NEON::BI__builtin_neon_vld2q_dup_v:
8446 case NEON::BI__builtin_neon_vld3_dup_v:
8447 case NEON::BI__builtin_neon_vld3q_dup_v:
8448 case NEON::BI__builtin_neon_vld4_dup_v:
8449 case NEON::BI__builtin_neon_vld4q_dup_v: {
8450 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8451 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8452 Value *Align = getAlignmentValue32(PtrOp1);
8453 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8454 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8455 }
8456 case NEON::BI__builtin_neon_vld1_dup_v:
8457 case NEON::BI__builtin_neon_vld1q_dup_v: {
8458 Value *V = PoisonValue::get(Ty);
8459 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8460 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8461 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8462 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8463 return EmitNeonSplat(Ops[0], CI);
8464 }
8465 case NEON::BI__builtin_neon_vld2_lane_v:
8466 case NEON::BI__builtin_neon_vld2q_lane_v:
8467 case NEON::BI__builtin_neon_vld3_lane_v:
8468 case NEON::BI__builtin_neon_vld3q_lane_v:
8469 case NEON::BI__builtin_neon_vld4_lane_v:
8470 case NEON::BI__builtin_neon_vld4q_lane_v: {
8471 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8472 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8473 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8474 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8475 Ops.push_back(getAlignmentValue32(PtrOp1));
8476 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8477 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8478 }
8479 case NEON::BI__builtin_neon_vmovl_v: {
8480 llvm::FixedVectorType *DTy =
8481 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8482 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8483 if (Usgn)
8484 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8485 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8486 }
8487 case NEON::BI__builtin_neon_vmovn_v: {
8488 llvm::FixedVectorType *QTy =
8489 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8490 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8491 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8492 }
8493 case NEON::BI__builtin_neon_vmull_v:
8494 // FIXME: the integer vmull operations could be emitted in terms of pure
8495 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8496 // hoisting the exts outside loops. Until global ISel comes along that can
8497 // see through such movement this leads to bad CodeGen. So we need an
8498 // intrinsic for now.
8499 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8500 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8501 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8502 case NEON::BI__builtin_neon_vpadal_v:
8503 case NEON::BI__builtin_neon_vpadalq_v: {
8504 // The source operand type has twice as many elements of half the size.
8505 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8506 llvm::Type *EltTy =
8507 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8508 auto *NarrowTy =
8509 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8510 llvm::Type *Tys[2] = { Ty, NarrowTy };
8511 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8512 }
8513 case NEON::BI__builtin_neon_vpaddl_v:
8514 case NEON::BI__builtin_neon_vpaddlq_v: {
8515 // The source operand type has twice as many elements of half the size.
8516 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8517 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8518 auto *NarrowTy =
8519 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8520 llvm::Type *Tys[2] = { Ty, NarrowTy };
8521 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8522 }
8523 case NEON::BI__builtin_neon_vqdmlal_v:
8524 case NEON::BI__builtin_neon_vqdmlsl_v: {
8525 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8526 Ops[1] =
8527 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8528 Ops.resize(2);
8529 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8530 }
8531 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8532 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8533 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8534 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8535 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8536 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8537 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8538 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8539 RTy->getNumElements() * 2);
8540 llvm::Type *Tys[2] = {
8541 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8542 /*isQuad*/ false))};
8543 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8544 }
8545 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8546 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8547 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8548 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8549 llvm::Type *Tys[2] = {
8550 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8551 /*isQuad*/ true))};
8552 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8553 }
8554 case NEON::BI__builtin_neon_vqshl_n_v:
8555 case NEON::BI__builtin_neon_vqshlq_n_v:
8556 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8557 1, false);
8558 case NEON::BI__builtin_neon_vqshlu_n_v:
8559 case NEON::BI__builtin_neon_vqshluq_n_v:
8560 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8561 1, false);
8562 case NEON::BI__builtin_neon_vrecpe_v:
8563 case NEON::BI__builtin_neon_vrecpeq_v:
8564 case NEON::BI__builtin_neon_vrsqrte_v:
8565 case NEON::BI__builtin_neon_vrsqrteq_v:
8566 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8567 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8568 case NEON::BI__builtin_neon_vrndi_v:
8569 case NEON::BI__builtin_neon_vrndiq_v:
8570 Int = Builder.getIsFPConstrained()
8571 ? Intrinsic::experimental_constrained_nearbyint
8572 : Intrinsic::nearbyint;
8573 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8574 case NEON::BI__builtin_neon_vrshr_n_v:
8575 case NEON::BI__builtin_neon_vrshrq_n_v:
8576 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8577 1, true);
8578 case NEON::BI__builtin_neon_vsha512hq_u64:
8579 case NEON::BI__builtin_neon_vsha512h2q_u64:
8580 case NEON::BI__builtin_neon_vsha512su0q_u64:
8581 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8582 Function *F = CGM.getIntrinsic(Int);
8583 return EmitNeonCall(F, Ops, "");
8584 }
8585 case NEON::BI__builtin_neon_vshl_n_v:
8586 case NEON::BI__builtin_neon_vshlq_n_v:
8587 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8588 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8589 "vshl_n");
8590 case NEON::BI__builtin_neon_vshll_n_v: {
8591 llvm::FixedVectorType *SrcTy =
8592 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8593 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8594 if (Usgn)
8595 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8596 else
8597 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8598 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8599 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8600 }
8601 case NEON::BI__builtin_neon_vshrn_n_v: {
8602 llvm::FixedVectorType *SrcTy =
8603 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8604 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8605 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8606 if (Usgn)
8607 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8608 else
8609 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8610 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8611 }
8612 case NEON::BI__builtin_neon_vshr_n_v:
8613 case NEON::BI__builtin_neon_vshrq_n_v:
8614 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8615 case NEON::BI__builtin_neon_vst1_v:
8616 case NEON::BI__builtin_neon_vst1q_v:
8617 case NEON::BI__builtin_neon_vst2_v:
8618 case NEON::BI__builtin_neon_vst2q_v:
8619 case NEON::BI__builtin_neon_vst3_v:
8620 case NEON::BI__builtin_neon_vst3q_v:
8621 case NEON::BI__builtin_neon_vst4_v:
8622 case NEON::BI__builtin_neon_vst4q_v:
8623 case NEON::BI__builtin_neon_vst2_lane_v:
8624 case NEON::BI__builtin_neon_vst2q_lane_v:
8625 case NEON::BI__builtin_neon_vst3_lane_v:
8626 case NEON::BI__builtin_neon_vst3q_lane_v:
8627 case NEON::BI__builtin_neon_vst4_lane_v:
8628 case NEON::BI__builtin_neon_vst4q_lane_v: {
8629 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8630 Ops.push_back(getAlignmentValue32(PtrOp0));
8631 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8632 }
8633 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8634 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8635 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8636 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8637 case NEON::BI__builtin_neon_vsm4eq_u32: {
8638 Function *F = CGM.getIntrinsic(Int);
8639 return EmitNeonCall(F, Ops, "");
8640 }
8641 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8642 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8643 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8644 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8645 Function *F = CGM.getIntrinsic(Int);
8646 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8647 return EmitNeonCall(F, Ops, "");
8648 }
8649 case NEON::BI__builtin_neon_vst1_x2_v:
8650 case NEON::BI__builtin_neon_vst1q_x2_v:
8651 case NEON::BI__builtin_neon_vst1_x3_v:
8652 case NEON::BI__builtin_neon_vst1q_x3_v:
8653 case NEON::BI__builtin_neon_vst1_x4_v:
8654 case NEON::BI__builtin_neon_vst1q_x4_v: {
8655 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8656 // in AArch64 it comes last. We may want to stick to one or another.
8657 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8658 Arch == llvm::Triple::aarch64_32) {
8659 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8660 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8661 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8662 }
8663 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8664 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8665 }
8666 case NEON::BI__builtin_neon_vsubhn_v: {
8667 llvm::FixedVectorType *SrcTy =
8668 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8669
8670 // %sum = add <4 x i32> %lhs, %rhs
8671 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8672 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8673 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8674
8675 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8676 Constant *ShiftAmt =
8677 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8678 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8679
8680 // %res = trunc <4 x i32> %high to <4 x i16>
8681 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8682 }
8683 case NEON::BI__builtin_neon_vtrn_v:
8684 case NEON::BI__builtin_neon_vtrnq_v: {
8685 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8686 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8687 Value *SV = nullptr;
8688
8689 for (unsigned vi = 0; vi != 2; ++vi) {
8690 SmallVector<int, 16> Indices;
8691 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8692 Indices.push_back(i+vi);
8693 Indices.push_back(i+e+vi);
8694 }
8695 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8696 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8697 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8698 }
8699 return SV;
8700 }
8701 case NEON::BI__builtin_neon_vtst_v:
8702 case NEON::BI__builtin_neon_vtstq_v: {
8703 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8704 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8705 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8706 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8707 ConstantAggregateZero::get(Ty));
8708 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8709 }
8710 case NEON::BI__builtin_neon_vuzp_v:
8711 case NEON::BI__builtin_neon_vuzpq_v: {
8712 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8713 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8714 Value *SV = nullptr;
8715
8716 for (unsigned vi = 0; vi != 2; ++vi) {
8717 SmallVector<int, 16> Indices;
8718 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8719 Indices.push_back(2*i+vi);
8720
8721 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8722 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8723 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8724 }
8725 return SV;
8726 }
8727 case NEON::BI__builtin_neon_vxarq_u64: {
8728 Function *F = CGM.getIntrinsic(Int);
8729 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8730 return EmitNeonCall(F, Ops, "");
8731 }
8732 case NEON::BI__builtin_neon_vzip_v:
8733 case NEON::BI__builtin_neon_vzipq_v: {
8734 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8735 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8736 Value *SV = nullptr;
8737
8738 for (unsigned vi = 0; vi != 2; ++vi) {
8739 SmallVector<int, 16> Indices;
8740 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8741 Indices.push_back((i + vi*e) >> 1);
8742 Indices.push_back(((i + vi*e) >> 1)+e);
8743 }
8744 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8745 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8746 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8747 }
8748 return SV;
8749 }
8750 case NEON::BI__builtin_neon_vdot_s32:
8751 case NEON::BI__builtin_neon_vdot_u32:
8752 case NEON::BI__builtin_neon_vdotq_s32:
8753 case NEON::BI__builtin_neon_vdotq_u32: {
8754 auto *InputTy =
8755 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8756 llvm::Type *Tys[2] = { Ty, InputTy };
8757 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8758 }
8759 case NEON::BI__builtin_neon_vfmlal_low_f16:
8760 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8761 auto *InputTy =
8762 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8763 llvm::Type *Tys[2] = { Ty, InputTy };
8764 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8765 }
8766 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8767 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8768 auto *InputTy =
8769 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8770 llvm::Type *Tys[2] = { Ty, InputTy };
8771 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8772 }
8773 case NEON::BI__builtin_neon_vfmlal_high_f16:
8774 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8775 auto *InputTy =
8776 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8777 llvm::Type *Tys[2] = { Ty, InputTy };
8778 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8779 }
8780 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8781 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8782 auto *InputTy =
8783 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8784 llvm::Type *Tys[2] = { Ty, InputTy };
8785 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8786 }
8787 case NEON::BI__builtin_neon_vmmlaq_s32:
8788 case NEON::BI__builtin_neon_vmmlaq_u32: {
8789 auto *InputTy =
8790 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8791 llvm::Type *Tys[2] = { Ty, InputTy };
8792 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8793 }
8794 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8795 auto *InputTy =
8796 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8797 llvm::Type *Tys[2] = { Ty, InputTy };
8798 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8799 }
8800 case NEON::BI__builtin_neon_vusdot_s32:
8801 case NEON::BI__builtin_neon_vusdotq_s32: {
8802 auto *InputTy =
8803 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8804 llvm::Type *Tys[2] = { Ty, InputTy };
8805 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8806 }
8807 case NEON::BI__builtin_neon_vbfdot_f32:
8808 case NEON::BI__builtin_neon_vbfdotq_f32: {
8809 llvm::Type *InputTy =
8810 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8811 llvm::Type *Tys[2] = { Ty, InputTy };
8812 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8813 }
8814 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8815 llvm::Type *Tys[1] = { Ty };
8816 Function *F = CGM.getIntrinsic(Int, Tys);
8817 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8818 }
8819
8820 }
8821
8822 assert(Int && "Expected valid intrinsic number");
8823
8824 // Determine the type(s) of this overloaded AArch64 intrinsic.
8825 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8826
8827 Value *Result = EmitNeonCall(F, Ops, NameHint);
8828 llvm::Type *ResultType = ConvertType(E->getType());
8829 // AArch64 intrinsic one-element vector type cast to
8830 // scalar type expected by the builtin
8831 return Builder.CreateBitCast(Result, ResultType, NameHint);
8832}
8833
8835 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8836 const CmpInst::Predicate Ip, const Twine &Name) {
8837 llvm::Type *OTy = Op->getType();
8838
8839 // FIXME: this is utterly horrific. We should not be looking at previous
8840 // codegen context to find out what needs doing. Unfortunately TableGen
8841 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8842 // (etc).
8843 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8844 OTy = BI->getOperand(0)->getType();
8845
8846 Op = Builder.CreateBitCast(Op, OTy);
8847 if (OTy->getScalarType()->isFloatingPointTy()) {
8848 if (Fp == CmpInst::FCMP_OEQ)
8849 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8850 else
8851 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8852 } else {
8853 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8854 }
8855 return Builder.CreateSExt(Op, Ty, Name);
8856}
8857
8859 Value *ExtOp, Value *IndexOp,
8860 llvm::Type *ResTy, unsigned IntID,
8861 const char *Name) {
8863 if (ExtOp)
8864 TblOps.push_back(ExtOp);
8865
8866 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8867 SmallVector<int, 16> Indices;
8868 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8869 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8870 Indices.push_back(2*i);
8871 Indices.push_back(2*i+1);
8872 }
8873
8874 int PairPos = 0, End = Ops.size() - 1;
8875 while (PairPos < End) {
8876 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8877 Ops[PairPos+1], Indices,
8878 Name));
8879 PairPos += 2;
8880 }
8881
8882 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8883 // of the 128-bit lookup table with zero.
8884 if (PairPos == End) {
8885 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8886 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8887 ZeroTbl, Indices, Name));
8888 }
8889
8890 Function *TblF;
8891 TblOps.push_back(IndexOp);
8892 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8893
8894 return CGF.EmitNeonCall(TblF, TblOps, Name);
8895}
8896
8897Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8898 unsigned Value;
8899 switch (BuiltinID) {
8900 default:
8901 return nullptr;
8902 case clang::ARM::BI__builtin_arm_nop:
8903 Value = 0;
8904 break;
8905 case clang::ARM::BI__builtin_arm_yield:
8906 case clang::ARM::BI__yield:
8907 Value = 1;
8908 break;
8909 case clang::ARM::BI__builtin_arm_wfe:
8910 case clang::ARM::BI__wfe:
8911 Value = 2;
8912 break;
8913 case clang::ARM::BI__builtin_arm_wfi:
8914 case clang::ARM::BI__wfi:
8915 Value = 3;
8916 break;
8917 case clang::ARM::BI__builtin_arm_sev:
8918 case clang::ARM::BI__sev:
8919 Value = 4;
8920 break;
8921 case clang::ARM::BI__builtin_arm_sevl:
8922 case clang::ARM::BI__sevl:
8923 Value = 5;
8924 break;
8925 }
8926
8927 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8928 llvm::ConstantInt::get(Int32Ty, Value));
8929}
8930
8935};
8936
8937// Generates the IR for __builtin_read_exec_*.
8938// Lowers the builtin to amdgcn_ballot intrinsic.
8940 llvm::Type *RegisterType,
8941 llvm::Type *ValueType, bool isExecHi) {
8942 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8943 CodeGen::CodeGenModule &CGM = CGF.CGM;
8944
8945 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8946 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8947
8948 if (isExecHi) {
8949 Value *Rt2 = Builder.CreateLShr(Call, 32);
8950 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8951 return Rt2;
8952 }
8953
8954 return Call;
8955}
8956
8957// Generates the IR for the read/write special register builtin,
8958// ValueType is the type of the value that is to be written or read,
8959// RegisterType is the type of the register being written to or read from.
8961 const CallExpr *E,
8962 llvm::Type *RegisterType,
8963 llvm::Type *ValueType,
8964 SpecialRegisterAccessKind AccessKind,
8965 StringRef SysReg = "") {
8966 // write and register intrinsics only support 32, 64 and 128 bit operations.
8967 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8968 RegisterType->isIntegerTy(128)) &&
8969 "Unsupported size for register.");
8970
8971 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8972 CodeGen::CodeGenModule &CGM = CGF.CGM;
8973 LLVMContext &Context = CGM.getLLVMContext();
8974
8975 if (SysReg.empty()) {
8976 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8977 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8978 }
8979
8980 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8981 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8982 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8983
8984 llvm::Type *Types[] = { RegisterType };
8985
8986 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8987 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8988 && "Can't fit 64-bit value in 32-bit register");
8989
8990 if (AccessKind != Write) {
8991 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8992 llvm::Function *F = CGM.getIntrinsic(
8993 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8994 : llvm::Intrinsic::read_register,
8995 Types);
8996 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8997
8998 if (MixedTypes)
8999 // Read into 64 bit register and then truncate result to 32 bit.
9000 return Builder.CreateTrunc(Call, ValueType);
9001
9002 if (ValueType->isPointerTy())
9003 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
9004 return Builder.CreateIntToPtr(Call, ValueType);
9005
9006 return Call;
9007 }
9008
9009 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
9010 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
9011 if (MixedTypes) {
9012 // Extend 32 bit write value to 64 bit to pass to write.
9013 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
9014 return Builder.CreateCall(F, { Metadata, ArgValue });
9015 }
9016
9017 if (ValueType->isPointerTy()) {
9018 // Have VoidPtrTy ArgValue but want to return an i32/i64.
9019 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
9020 return Builder.CreateCall(F, { Metadata, ArgValue });
9021 }
9022
9023 return Builder.CreateCall(F, { Metadata, ArgValue });
9024}
9025
9026/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
9027/// argument that specifies the vector type.
9028static bool HasExtraNeonArgument(unsigned BuiltinID) {
9029 switch (BuiltinID) {
9030 default: break;
9031 case NEON::BI__builtin_neon_vget_lane_i8:
9032 case NEON::BI__builtin_neon_vget_lane_i16:
9033 case NEON::BI__builtin_neon_vget_lane_bf16:
9034 case NEON::BI__builtin_neon_vget_lane_i32:
9035 case NEON::BI__builtin_neon_vget_lane_i64:
9036 case NEON::BI__builtin_neon_vget_lane_f32:
9037 case NEON::BI__builtin_neon_vgetq_lane_i8:
9038 case NEON::BI__builtin_neon_vgetq_lane_i16:
9039 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9040 case NEON::BI__builtin_neon_vgetq_lane_i32:
9041 case NEON::BI__builtin_neon_vgetq_lane_i64:
9042 case NEON::BI__builtin_neon_vgetq_lane_f32:
9043 case NEON::BI__builtin_neon_vduph_lane_bf16:
9044 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9045 case NEON::BI__builtin_neon_vset_lane_i8:
9046 case NEON::BI__builtin_neon_vset_lane_i16:
9047 case NEON::BI__builtin_neon_vset_lane_bf16:
9048 case NEON::BI__builtin_neon_vset_lane_i32:
9049 case NEON::BI__builtin_neon_vset_lane_i64:
9050 case NEON::BI__builtin_neon_vset_lane_f32:
9051 case NEON::BI__builtin_neon_vsetq_lane_i8:
9052 case NEON::BI__builtin_neon_vsetq_lane_i16:
9053 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9054 case NEON::BI__builtin_neon_vsetq_lane_i32:
9055 case NEON::BI__builtin_neon_vsetq_lane_i64:
9056 case NEON::BI__builtin_neon_vsetq_lane_f32:
9057 case NEON::BI__builtin_neon_vsha1h_u32:
9058 case NEON::BI__builtin_neon_vsha1cq_u32:
9059 case NEON::BI__builtin_neon_vsha1pq_u32:
9060 case NEON::BI__builtin_neon_vsha1mq_u32:
9061 case NEON::BI__builtin_neon_vcvth_bf16_f32:
9062 case clang::ARM::BI_MoveToCoprocessor:
9063 case clang::ARM::BI_MoveToCoprocessor2:
9064 return false;
9065 }
9066 return true;
9067}
9068
9069Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
9070 const CallExpr *E,
9071 ReturnValueSlot ReturnValue,
9072 llvm::Triple::ArchType Arch) {
9073 if (auto Hint = GetValueForARMHint(BuiltinID))
9074 return Hint;
9075
9076 if (BuiltinID == clang::ARM::BI__emit) {
9077 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
9078 llvm::FunctionType *FTy =
9079 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
9080
9082 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
9083 llvm_unreachable("Sema will ensure that the parameter is constant");
9084
9085 llvm::APSInt Value = Result.Val.getInt();
9086 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
9087
9088 llvm::InlineAsm *Emit =
9089 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
9090 /*hasSideEffects=*/true)
9091 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
9092 /*hasSideEffects=*/true);
9093
9094 return Builder.CreateCall(Emit);
9095 }
9096
9097 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
9098 Value *Option = EmitScalarExpr(E->getArg(0));
9099 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
9100 }
9101
9102 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
9103 Value *Address = EmitScalarExpr(E->getArg(0));
9104 Value *RW = EmitScalarExpr(E->getArg(1));
9105 Value *IsData = EmitScalarExpr(E->getArg(2));
9106
9107 // Locality is not supported on ARM target
9108 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
9109
9110 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
9111 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
9112 }
9113
9114 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
9115 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9116 return Builder.CreateCall(
9117 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
9118 }
9119
9120 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
9121 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
9122 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9123 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
9124 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
9125 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
9126 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
9127 return Res;
9128 }
9129
9130
9131 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
9132 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9133 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
9134 }
9135 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
9136 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9137 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
9138 "cls");
9139 }
9140
9141 if (BuiltinID == clang::ARM::BI__clear_cache) {
9142 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9143 const FunctionDecl *FD = E->getDirectCallee();
9144 Value *Ops[2];
9145 for (unsigned i = 0; i < 2; i++)
9146 Ops[i] = EmitScalarExpr(E->getArg(i));
9147 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9148 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9149 StringRef Name = FD->getName();
9150 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9151 }
9152
9153 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9154 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9155 Function *F;
9156
9157 switch (BuiltinID) {
9158 default: llvm_unreachable("unexpected builtin");
9159 case clang::ARM::BI__builtin_arm_mcrr:
9160 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
9161 break;
9162 case clang::ARM::BI__builtin_arm_mcrr2:
9163 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
9164 break;
9165 }
9166
9167 // MCRR{2} instruction has 5 operands but
9168 // the intrinsic has 4 because Rt and Rt2
9169 // are represented as a single unsigned 64
9170 // bit integer in the intrinsic definition
9171 // but internally it's represented as 2 32
9172 // bit integers.
9173
9174 Value *Coproc = EmitScalarExpr(E->getArg(0));
9175 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9176 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
9177 Value *CRm = EmitScalarExpr(E->getArg(3));
9178
9179 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9180 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
9181 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
9182 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
9183
9184 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9185 }
9186
9187 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9188 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9189 Function *F;
9190
9191 switch (BuiltinID) {
9192 default: llvm_unreachable("unexpected builtin");
9193 case clang::ARM::BI__builtin_arm_mrrc:
9194 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
9195 break;
9196 case clang::ARM::BI__builtin_arm_mrrc2:
9197 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
9198 break;
9199 }
9200
9201 Value *Coproc = EmitScalarExpr(E->getArg(0));
9202 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9203 Value *CRm = EmitScalarExpr(E->getArg(2));
9204 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
9205
9206 // Returns an unsigned 64 bit integer, represented
9207 // as two 32 bit integers.
9208
9209 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
9210 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
9211 Rt = Builder.CreateZExt(Rt, Int64Ty);
9212 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
9213
9214 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
9215 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
9216 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
9217
9218 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
9219 }
9220
9221 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9222 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9223 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9224 getContext().getTypeSize(E->getType()) == 64) ||
9225 BuiltinID == clang::ARM::BI__ldrexd) {
9226 Function *F;
9227
9228 switch (BuiltinID) {
9229 default: llvm_unreachable("unexpected builtin");
9230 case clang::ARM::BI__builtin_arm_ldaex:
9231 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
9232 break;
9233 case clang::ARM::BI__builtin_arm_ldrexd:
9234 case clang::ARM::BI__builtin_arm_ldrex:
9235 case clang::ARM::BI__ldrexd:
9236 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
9237 break;
9238 }
9239
9240 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9241 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
9242
9243 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9244 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9245 Val0 = Builder.CreateZExt(Val0, Int64Ty);
9246 Val1 = Builder.CreateZExt(Val1, Int64Ty);
9247
9248 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
9249 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9250 Val = Builder.CreateOr(Val, Val1);
9251 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9252 }
9253
9254 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9255 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9256 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9257
9258 QualType Ty = E->getType();
9259 llvm::Type *RealResTy = ConvertType(Ty);
9260 llvm::Type *IntTy =
9261 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9262
9264 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9265 : Intrinsic::arm_ldrex,
9266 UnqualPtrTy);
9267 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
9268 Val->addParamAttr(
9269 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9270
9271 if (RealResTy->isPointerTy())
9272 return Builder.CreateIntToPtr(Val, RealResTy);
9273 else {
9274 llvm::Type *IntResTy = llvm::IntegerType::get(
9275 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9276 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9277 RealResTy);
9278 }
9279 }
9280
9281 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9282 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9283 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9284 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
9286 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9287 : Intrinsic::arm_strexd);
9288 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
9289
9290 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9291 Value *Val = EmitScalarExpr(E->getArg(0));
9292 Builder.CreateStore(Val, Tmp);
9293
9294 Address LdPtr = Tmp.withElementType(STy);
9295 Val = Builder.CreateLoad(LdPtr);
9296
9297 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9298 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9299 Value *StPtr = EmitScalarExpr(E->getArg(1));
9300 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
9301 }
9302
9303 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9304 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9305 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9306 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9307
9308 QualType Ty = E->getArg(0)->getType();
9309 llvm::Type *StoreTy =
9310 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9311
9312 if (StoreVal->getType()->isPointerTy())
9313 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
9314 else {
9315 llvm::Type *IntTy = llvm::IntegerType::get(
9317 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9318 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9319 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
9320 }
9321
9323 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9324 : Intrinsic::arm_strex,
9325 StoreAddr->getType());
9326
9327 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
9328 CI->addParamAttr(
9329 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9330 return CI;
9331 }
9332
9333 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9334 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
9335 return Builder.CreateCall(F);
9336 }
9337
9338 // CRC32
9339 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9340 switch (BuiltinID) {
9341 case clang::ARM::BI__builtin_arm_crc32b:
9342 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
9343 case clang::ARM::BI__builtin_arm_crc32cb:
9344 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
9345 case clang::ARM::BI__builtin_arm_crc32h:
9346 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
9347 case clang::ARM::BI__builtin_arm_crc32ch:
9348 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
9349 case clang::ARM::BI__builtin_arm_crc32w:
9350 case clang::ARM::BI__builtin_arm_crc32d:
9351 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
9352 case clang::ARM::BI__builtin_arm_crc32cw:
9353 case clang::ARM::BI__builtin_arm_crc32cd:
9354 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
9355 }
9356
9357 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9358 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9359 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9360
9361 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9362 // intrinsics, hence we need different codegen for these cases.
9363 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9364 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9365 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9366 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
9367 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
9368 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
9369
9370 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9371 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
9372 return Builder.CreateCall(F, {Res, Arg1b});
9373 } else {
9374 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
9375
9376 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9377 return Builder.CreateCall(F, {Arg0, Arg1});
9378 }
9379 }
9380
9381 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9382 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9383 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9384 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9385 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9386 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9387
9388 SpecialRegisterAccessKind AccessKind = Write;
9389 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9390 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9391 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9392 AccessKind = VolatileRead;
9393
9394 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9395 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9396
9397 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9398 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9399
9400 llvm::Type *ValueType;
9401 llvm::Type *RegisterType;
9402 if (IsPointerBuiltin) {
9403 ValueType = VoidPtrTy;
9405 } else if (Is64Bit) {
9406 ValueType = RegisterType = Int64Ty;
9407 } else {
9408 ValueType = RegisterType = Int32Ty;
9409 }
9410
9411 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9412 AccessKind);
9413 }
9414
9415 if (BuiltinID == ARM::BI__builtin_sponentry) {
9416 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9417 return Builder.CreateCall(F);
9418 }
9419
9420 // Handle MSVC intrinsics before argument evaluation to prevent double
9421 // evaluation.
9422 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
9423 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
9424
9425 // Deal with MVE builtins
9426 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9427 return Result;
9428 // Handle CDE builtins
9429 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9430 return Result;
9431
9432 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9433 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
9434 return P.first == BuiltinID;
9435 });
9436 if (It != end(NEONEquivalentIntrinsicMap))
9437 BuiltinID = It->second;
9438
9439 // Find out if any arguments are required to be integer constant
9440 // expressions.
9441 unsigned ICEArguments = 0;
9443 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9444 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9445
9446 auto getAlignmentValue32 = [&](Address addr) -> Value* {
9447 return Builder.getInt32(addr.getAlignment().getQuantity());
9448 };
9449
9450 Address PtrOp0 = Address::invalid();
9451 Address PtrOp1 = Address::invalid();
9453 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9454 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9455 for (unsigned i = 0, e = NumArgs; i != e; i++) {
9456 if (i == 0) {
9457 switch (BuiltinID) {
9458 case NEON::BI__builtin_neon_vld1_v:
9459 case NEON::BI__builtin_neon_vld1q_v:
9460 case NEON::BI__builtin_neon_vld1q_lane_v:
9461 case NEON::BI__builtin_neon_vld1_lane_v:
9462 case NEON::BI__builtin_neon_vld1_dup_v:
9463 case NEON::BI__builtin_neon_vld1q_dup_v:
9464 case NEON::BI__builtin_neon_vst1_v:
9465 case NEON::BI__builtin_neon_vst1q_v:
9466 case NEON::BI__builtin_neon_vst1q_lane_v:
9467 case NEON::BI__builtin_neon_vst1_lane_v:
9468 case NEON::BI__builtin_neon_vst2_v:
9469 case NEON::BI__builtin_neon_vst2q_v:
9470 case NEON::BI__builtin_neon_vst2_lane_v:
9471 case NEON::BI__builtin_neon_vst2q_lane_v:
9472 case NEON::BI__builtin_neon_vst3_v:
9473 case NEON::BI__builtin_neon_vst3q_v:
9474 case NEON::BI__builtin_neon_vst3_lane_v:
9475 case NEON::BI__builtin_neon_vst3q_lane_v:
9476 case NEON::BI__builtin_neon_vst4_v:
9477 case NEON::BI__builtin_neon_vst4q_v:
9478 case NEON::BI__builtin_neon_vst4_lane_v:
9479 case NEON::BI__builtin_neon_vst4q_lane_v:
9480 // Get the alignment for the argument in addition to the value;
9481 // we'll use it later.
9482 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9483 Ops.push_back(PtrOp0.emitRawPointer(*this));
9484 continue;
9485 }
9486 }
9487 if (i == 1) {
9488 switch (BuiltinID) {
9489 case NEON::BI__builtin_neon_vld2_v:
9490 case NEON::BI__builtin_neon_vld2q_v:
9491 case NEON::BI__builtin_neon_vld3_v:
9492 case NEON::BI__builtin_neon_vld3q_v:
9493 case NEON::BI__builtin_neon_vld4_v:
9494 case NEON::BI__builtin_neon_vld4q_v:
9495 case NEON::BI__builtin_neon_vld2_lane_v:
9496 case NEON::BI__builtin_neon_vld2q_lane_v:
9497 case NEON::BI__builtin_neon_vld3_lane_v:
9498 case NEON::BI__builtin_neon_vld3q_lane_v:
9499 case NEON::BI__builtin_neon_vld4_lane_v:
9500 case NEON::BI__builtin_neon_vld4q_lane_v:
9501 case NEON::BI__builtin_neon_vld2_dup_v:
9502 case NEON::BI__builtin_neon_vld2q_dup_v:
9503 case NEON::BI__builtin_neon_vld3_dup_v:
9504 case NEON::BI__builtin_neon_vld3q_dup_v:
9505 case NEON::BI__builtin_neon_vld4_dup_v:
9506 case NEON::BI__builtin_neon_vld4q_dup_v:
9507 // Get the alignment for the argument in addition to the value;
9508 // we'll use it later.
9509 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9510 Ops.push_back(PtrOp1.emitRawPointer(*this));
9511 continue;
9512 }
9513 }
9514
9515 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9516 }
9517
9518 switch (BuiltinID) {
9519 default: break;
9520
9521 case NEON::BI__builtin_neon_vget_lane_i8:
9522 case NEON::BI__builtin_neon_vget_lane_i16:
9523 case NEON::BI__builtin_neon_vget_lane_i32:
9524 case NEON::BI__builtin_neon_vget_lane_i64:
9525 case NEON::BI__builtin_neon_vget_lane_bf16:
9526 case NEON::BI__builtin_neon_vget_lane_f32:
9527 case NEON::BI__builtin_neon_vgetq_lane_i8:
9528 case NEON::BI__builtin_neon_vgetq_lane_i16:
9529 case NEON::BI__builtin_neon_vgetq_lane_i32:
9530 case NEON::BI__builtin_neon_vgetq_lane_i64:
9531 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9532 case NEON::BI__builtin_neon_vgetq_lane_f32:
9533 case NEON::BI__builtin_neon_vduph_lane_bf16:
9534 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9535 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9536
9537 case NEON::BI__builtin_neon_vrndns_f32: {
9538 Value *Arg = EmitScalarExpr(E->getArg(0));
9539 llvm::Type *Tys[] = {Arg->getType()};
9540 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9541 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9542
9543 case NEON::BI__builtin_neon_vset_lane_i8:
9544 case NEON::BI__builtin_neon_vset_lane_i16:
9545 case NEON::BI__builtin_neon_vset_lane_i32:
9546 case NEON::BI__builtin_neon_vset_lane_i64:
9547 case NEON::BI__builtin_neon_vset_lane_bf16:
9548 case NEON::BI__builtin_neon_vset_lane_f32:
9549 case NEON::BI__builtin_neon_vsetq_lane_i8:
9550 case NEON::BI__builtin_neon_vsetq_lane_i16:
9551 case NEON::BI__builtin_neon_vsetq_lane_i32:
9552 case NEON::BI__builtin_neon_vsetq_lane_i64:
9553 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9554 case NEON::BI__builtin_neon_vsetq_lane_f32:
9555 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9556
9557 case NEON::BI__builtin_neon_vsha1h_u32:
9558 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9559 "vsha1h");
9560 case NEON::BI__builtin_neon_vsha1cq_u32:
9561 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9562 "vsha1h");
9563 case NEON::BI__builtin_neon_vsha1pq_u32:
9564 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9565 "vsha1h");
9566 case NEON::BI__builtin_neon_vsha1mq_u32:
9567 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9568 "vsha1h");
9569
9570 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9571 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9572 "vcvtbfp2bf");
9573 }
9574
9575 // The ARM _MoveToCoprocessor builtins put the input register value as
9576 // the first argument, but the LLVM intrinsic expects it as the third one.
9577 case clang::ARM::BI_MoveToCoprocessor:
9578 case clang::ARM::BI_MoveToCoprocessor2: {
9579 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9580 ? Intrinsic::arm_mcr
9581 : Intrinsic::arm_mcr2);
9582 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9583 Ops[3], Ops[4], Ops[5]});
9584 }
9585 }
9586
9587 // Get the last argument, which specifies the vector type.
9588 assert(HasExtraArg);
9589 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9590 std::optional<llvm::APSInt> Result =
9592 if (!Result)
9593 return nullptr;
9594
9595 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9596 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9597 // Determine the overloaded type of this builtin.
9598 llvm::Type *Ty;
9599 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9600 Ty = FloatTy;
9601 else
9602 Ty = DoubleTy;
9603
9604 // Determine whether this is an unsigned conversion or not.
9605 bool usgn = Result->getZExtValue() == 1;
9606 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9607
9608 // Call the appropriate intrinsic.
9609 Function *F = CGM.getIntrinsic(Int, Ty);
9610 return Builder.CreateCall(F, Ops, "vcvtr");
9611 }
9612
9613 // Determine the type of this overloaded NEON intrinsic.
9614 NeonTypeFlags Type = Result->getZExtValue();
9615 bool usgn = Type.isUnsigned();
9616 bool rightShift = false;
9617
9618 llvm::FixedVectorType *VTy =
9619 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9620 getTarget().hasBFloat16Type());
9621 llvm::Type *Ty = VTy;
9622 if (!Ty)
9623 return nullptr;
9624
9625 // Many NEON builtins have identical semantics and uses in ARM and
9626 // AArch64. Emit these in a single function.
9627 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9628 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9629 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9630 if (Builtin)
9632 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9633 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9634
9635 unsigned Int;
9636 switch (BuiltinID) {
9637 default: return nullptr;
9638 case NEON::BI__builtin_neon_vld1q_lane_v:
9639 // Handle 64-bit integer elements as a special case. Use shuffles of
9640 // one-element vectors to avoid poor code for i64 in the backend.
9641 if (VTy->getElementType()->isIntegerTy(64)) {
9642 // Extract the other lane.
9643 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9644 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9645 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9646 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9647 // Load the value as a one-element vector.
9648 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9649 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9650 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9651 Value *Align = getAlignmentValue32(PtrOp0);
9652 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9653 // Combine them.
9654 int Indices[] = {1 - Lane, Lane};
9655 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9656 }
9657 [[fallthrough]];
9658 case NEON::BI__builtin_neon_vld1_lane_v: {
9659 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9660 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9661 Value *Ld = Builder.CreateLoad(PtrOp0);
9662 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9663 }
9664 case NEON::BI__builtin_neon_vqrshrn_n_v:
9665 Int =
9666 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9667 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9668 1, true);
9669 case NEON::BI__builtin_neon_vqrshrun_n_v:
9670 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9671 Ops, "vqrshrun_n", 1, true);
9672 case NEON::BI__builtin_neon_vqshrn_n_v:
9673 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9674 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9675 1, true);
9676 case NEON::BI__builtin_neon_vqshrun_n_v:
9677 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9678 Ops, "vqshrun_n", 1, true);
9679 case NEON::BI__builtin_neon_vrecpe_v:
9680 case NEON::BI__builtin_neon_vrecpeq_v:
9681 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9682 Ops, "vrecpe");
9683 case NEON::BI__builtin_neon_vrshrn_n_v:
9684 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9685 Ops, "vrshrn_n", 1, true);
9686 case NEON::BI__builtin_neon_vrsra_n_v:
9687 case NEON::BI__builtin_neon_vrsraq_n_v:
9688 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9689 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9690 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9691 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9692 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9693 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9694 case NEON::BI__builtin_neon_vsri_n_v:
9695 case NEON::BI__builtin_neon_vsriq_n_v:
9696 rightShift = true;
9697 [[fallthrough]];
9698 case NEON::BI__builtin_neon_vsli_n_v:
9699 case NEON::BI__builtin_neon_vsliq_n_v:
9700 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9701 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9702 Ops, "vsli_n");
9703 case NEON::BI__builtin_neon_vsra_n_v:
9704 case NEON::BI__builtin_neon_vsraq_n_v:
9705 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9706 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9707 return Builder.CreateAdd(Ops[0], Ops[1]);
9708 case NEON::BI__builtin_neon_vst1q_lane_v:
9709 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9710 // a one-element vector and avoid poor code for i64 in the backend.
9711 if (VTy->getElementType()->isIntegerTy(64)) {
9712 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9713 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9714 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9715 Ops[2] = getAlignmentValue32(PtrOp0);
9716 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9717 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9718 Tys), Ops);
9719 }
9720 [[fallthrough]];
9721 case NEON::BI__builtin_neon_vst1_lane_v: {
9722 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9723 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9724 return Builder.CreateStore(Ops[1],
9725 PtrOp0.withElementType(Ops[1]->getType()));
9726 }
9727 case NEON::BI__builtin_neon_vtbl1_v:
9728 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9729 Ops, "vtbl1");
9730 case NEON::BI__builtin_neon_vtbl2_v:
9731 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9732 Ops, "vtbl2");
9733 case NEON::BI__builtin_neon_vtbl3_v:
9734 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9735 Ops, "vtbl3");
9736 case NEON::BI__builtin_neon_vtbl4_v:
9737 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9738 Ops, "vtbl4");
9739 case NEON::BI__builtin_neon_vtbx1_v:
9740 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9741 Ops, "vtbx1");
9742 case NEON::BI__builtin_neon_vtbx2_v:
9743 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9744 Ops, "vtbx2");
9745 case NEON::BI__builtin_neon_vtbx3_v:
9746 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9747 Ops, "vtbx3");
9748 case NEON::BI__builtin_neon_vtbx4_v:
9749 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9750 Ops, "vtbx4");
9751 }
9752}
9753
9754template<typename Integer>
9756 return E->getIntegerConstantExpr(Context)->getExtValue();
9757}
9758
9759static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9760 llvm::Type *T, bool Unsigned) {
9761 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9762 // which finds it convenient to specify signed/unsigned as a boolean flag.
9763 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9764}
9765
9766static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9767 uint32_t Shift, bool Unsigned) {
9768 // MVE helper function for integer shift right. This must handle signed vs
9769 // unsigned, and also deal specially with the case where the shift count is
9770 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9771 // undefined behavior, but in MVE it's legal, so we must convert it to code
9772 // that is not undefined in IR.
9773 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9774 ->getElementType()
9775 ->getPrimitiveSizeInBits();
9776 if (Shift == LaneBits) {
9777 // An unsigned shift of the full lane size always generates zero, so we can
9778 // simply emit a zero vector. A signed shift of the full lane size does the
9779 // same thing as shifting by one bit fewer.
9780 if (Unsigned)
9781 return llvm::Constant::getNullValue(V->getType());
9782 else
9783 --Shift;
9784 }
9785 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9786}
9787
9788static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9789 // MVE-specific helper function for a vector splat, which infers the element
9790 // count of the output vector by knowing that MVE vectors are all 128 bits
9791 // wide.
9792 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9793 return Builder.CreateVectorSplat(Elements, V);
9794}
9795
9796static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9797 CodeGenFunction *CGF,
9798 llvm::Value *V,
9799 llvm::Type *DestType) {
9800 // Convert one MVE vector type into another by reinterpreting its in-register
9801 // format.
9802 //
9803 // Little-endian, this is identical to a bitcast (which reinterprets the
9804 // memory format). But big-endian, they're not necessarily the same, because
9805 // the register and memory formats map to each other differently depending on
9806 // the lane size.
9807 //
9808 // We generate a bitcast whenever we can (if we're little-endian, or if the
9809 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9810 // that performs the different kind of reinterpretation.
9811 if (CGF->getTarget().isBigEndian() &&
9812 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9813 return Builder.CreateCall(
9814 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9815 {DestType, V->getType()}),
9816 V);
9817 } else {
9818 return Builder.CreateBitCast(V, DestType);
9819 }
9820}
9821
9822static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9823 // Make a shufflevector that extracts every other element of a vector (evens
9824 // or odds, as desired).
9825 SmallVector<int, 16> Indices;
9826 unsigned InputElements =
9827 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9828 for (unsigned i = 0; i < InputElements; i += 2)
9829 Indices.push_back(i + Odd);
9830 return Builder.CreateShuffleVector(V, Indices);
9831}
9832
9833static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9834 llvm::Value *V1) {
9835 // Make a shufflevector that interleaves two vectors element by element.
9836 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9837 SmallVector<int, 16> Indices;
9838 unsigned InputElements =
9839 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9840 for (unsigned i = 0; i < InputElements; i++) {
9841 Indices.push_back(i);
9842 Indices.push_back(i + InputElements);
9843 }
9844 return Builder.CreateShuffleVector(V0, V1, Indices);
9845}
9846
9847template<unsigned HighBit, unsigned OtherBits>
9848static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9849 // MVE-specific helper function to make a vector splat of a constant such as
9850 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9851 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9852 unsigned LaneBits = T->getPrimitiveSizeInBits();
9853 uint32_t Value = HighBit << (LaneBits - 1);
9854 if (OtherBits)
9855 Value |= (1UL << (LaneBits - 1)) - 1;
9856 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9857 return ARMMVEVectorSplat(Builder, Lane);
9858}
9859
9860static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9861 llvm::Value *V,
9862 unsigned ReverseWidth) {
9863 // MVE-specific helper function which reverses the elements of a
9864 // vector within every (ReverseWidth)-bit collection of lanes.
9865 SmallVector<int, 16> Indices;
9866 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9867 unsigned Elements = 128 / LaneSize;
9868 unsigned Mask = ReverseWidth / LaneSize - 1;
9869 for (unsigned i = 0; i < Elements; i++)
9870 Indices.push_back(i ^ Mask);
9871 return Builder.CreateShuffleVector(V, Indices);
9872}
9873
9875 const CallExpr *E,
9876 ReturnValueSlot ReturnValue,
9877 llvm::Triple::ArchType Arch) {
9878 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9879 Intrinsic::ID IRIntr;
9880 unsigned NumVectors;
9881
9882 // Code autogenerated by Tablegen will handle all the simple builtins.
9883 switch (BuiltinID) {
9884 #include "clang/Basic/arm_mve_builtin_cg.inc"
9885
9886 // If we didn't match an MVE builtin id at all, go back to the
9887 // main EmitARMBuiltinExpr.
9888 default:
9889 return nullptr;
9890 }
9891
9892 // Anything that breaks from that switch is an MVE builtin that
9893 // needs handwritten code to generate.
9894
9895 switch (CustomCodeGenType) {
9896
9897 case CustomCodeGen::VLD24: {
9900
9901 auto MvecCType = E->getType();
9902 auto MvecLType = ConvertType(MvecCType);
9903 assert(MvecLType->isStructTy() &&
9904 "Return type for vld[24]q should be a struct");
9905 assert(MvecLType->getStructNumElements() == 1 &&
9906 "Return-type struct for vld[24]q should have one element");
9907 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9908 assert(MvecLTypeInner->isArrayTy() &&
9909 "Return-type struct for vld[24]q should contain an array");
9910 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9911 "Array member of return-type struct vld[24]q has wrong length");
9912 auto VecLType = MvecLTypeInner->getArrayElementType();
9913
9914 Tys.push_back(VecLType);
9915
9916 auto Addr = E->getArg(0);
9917 Ops.push_back(EmitScalarExpr(Addr));
9918 Tys.push_back(ConvertType(Addr->getType()));
9919
9920 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9921 Value *LoadResult = Builder.CreateCall(F, Ops);
9922 Value *MvecOut = PoisonValue::get(MvecLType);
9923 for (unsigned i = 0; i < NumVectors; ++i) {
9924 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9925 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9926 }
9927
9928 if (ReturnValue.isNull())
9929 return MvecOut;
9930 else
9931 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9932 }
9933
9934 case CustomCodeGen::VST24: {
9937
9938 auto Addr = E->getArg(0);
9939 Ops.push_back(EmitScalarExpr(Addr));
9940 Tys.push_back(ConvertType(Addr->getType()));
9941
9942 auto MvecCType = E->getArg(1)->getType();
9943 auto MvecLType = ConvertType(MvecCType);
9944 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9945 assert(MvecLType->getStructNumElements() == 1 &&
9946 "Data-type struct for vst2q should have one element");
9947 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9948 assert(MvecLTypeInner->isArrayTy() &&
9949 "Data-type struct for vst2q should contain an array");
9950 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9951 "Array member of return-type struct vld[24]q has wrong length");
9952 auto VecLType = MvecLTypeInner->getArrayElementType();
9953
9954 Tys.push_back(VecLType);
9955
9956 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9957 EmitAggExpr(E->getArg(1), MvecSlot);
9958 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9959 for (unsigned i = 0; i < NumVectors; i++)
9960 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9961
9962 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9963 Value *ToReturn = nullptr;
9964 for (unsigned i = 0; i < NumVectors; i++) {
9965 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9966 ToReturn = Builder.CreateCall(F, Ops);
9967 Ops.pop_back();
9968 }
9969 return ToReturn;
9970 }
9971 }
9972 llvm_unreachable("unknown custom codegen type.");
9973}
9974
9976 const CallExpr *E,
9977 ReturnValueSlot ReturnValue,
9978 llvm::Triple::ArchType Arch) {
9979 switch (BuiltinID) {
9980 default:
9981 return nullptr;
9982#include "clang/Basic/arm_cde_builtin_cg.inc"
9983 }
9984}
9985
9986static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9987 const CallExpr *E,
9989 llvm::Triple::ArchType Arch) {
9990 unsigned int Int = 0;
9991 const char *s = nullptr;
9992
9993 switch (BuiltinID) {
9994 default:
9995 return nullptr;
9996 case NEON::BI__builtin_neon_vtbl1_v:
9997 case NEON::BI__builtin_neon_vqtbl1_v:
9998 case NEON::BI__builtin_neon_vqtbl1q_v:
9999 case NEON::BI__builtin_neon_vtbl2_v:
10000 case NEON::BI__builtin_neon_vqtbl2_v:
10001 case NEON::BI__builtin_neon_vqtbl2q_v:
10002 case NEON::BI__builtin_neon_vtbl3_v:
10003 case NEON::BI__builtin_neon_vqtbl3_v:
10004 case NEON::BI__builtin_neon_vqtbl3q_v:
10005 case NEON::BI__builtin_neon_vtbl4_v:
10006 case NEON::BI__builtin_neon_vqtbl4_v:
10007 case NEON::BI__builtin_neon_vqtbl4q_v:
10008 break;
10009 case NEON::BI__builtin_neon_vtbx1_v:
10010 case NEON::BI__builtin_neon_vqtbx1_v:
10011 case NEON::BI__builtin_neon_vqtbx1q_v:
10012 case NEON::BI__builtin_neon_vtbx2_v:
10013 case NEON::BI__builtin_neon_vqtbx2_v:
10014 case NEON::BI__builtin_neon_vqtbx2q_v:
10015 case NEON::BI__builtin_neon_vtbx3_v:
10016 case NEON::BI__builtin_neon_vqtbx3_v:
10017 case NEON::BI__builtin_neon_vqtbx3q_v:
10018 case NEON::BI__builtin_neon_vtbx4_v:
10019 case NEON::BI__builtin_neon_vqtbx4_v:
10020 case NEON::BI__builtin_neon_vqtbx4q_v:
10021 break;
10022 }
10023
10024 assert(E->getNumArgs() >= 3);
10025
10026 // Get the last argument, which specifies the vector type.
10027 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
10028 std::optional<llvm::APSInt> Result =
10030 if (!Result)
10031 return nullptr;
10032
10033 // Determine the type of this overloaded NEON intrinsic.
10034 NeonTypeFlags Type = Result->getZExtValue();
10035 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
10036 if (!Ty)
10037 return nullptr;
10038
10039 CodeGen::CGBuilderTy &Builder = CGF.Builder;
10040
10041 // AArch64 scalar builtins are not overloaded, they do not have an extra
10042 // argument that specifies the vector type, need to handle each case.
10043 switch (BuiltinID) {
10044 case NEON::BI__builtin_neon_vtbl1_v: {
10045 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
10046 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10047 }
10048 case NEON::BI__builtin_neon_vtbl2_v: {
10049 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
10050 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10051 }
10052 case NEON::BI__builtin_neon_vtbl3_v: {
10053 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
10054 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10055 }
10056 case NEON::BI__builtin_neon_vtbl4_v: {
10057 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
10058 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10059 }
10060 case NEON::BI__builtin_neon_vtbx1_v: {
10061 Value *TblRes =
10062 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
10063 Intrinsic::aarch64_neon_tbl1, "vtbl1");
10064
10065 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
10066 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
10067 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10068
10069 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10070 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10071 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10072 }
10073 case NEON::BI__builtin_neon_vtbx2_v: {
10074 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
10075 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
10076 }
10077 case NEON::BI__builtin_neon_vtbx3_v: {
10078 Value *TblRes =
10079 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
10080 Intrinsic::aarch64_neon_tbl2, "vtbl2");
10081
10082 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
10083 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
10084 TwentyFourV);
10085 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10086
10087 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10088 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10089 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10090 }
10091 case NEON::BI__builtin_neon_vtbx4_v: {
10092 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
10093 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
10094 }
10095 case NEON::BI__builtin_neon_vqtbl1_v:
10096 case NEON::BI__builtin_neon_vqtbl1q_v:
10097 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
10098 case NEON::BI__builtin_neon_vqtbl2_v:
10099 case NEON::BI__builtin_neon_vqtbl2q_v: {
10100 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
10101 case NEON::BI__builtin_neon_vqtbl3_v:
10102 case NEON::BI__builtin_neon_vqtbl3q_v:
10103 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
10104 case NEON::BI__builtin_neon_vqtbl4_v:
10105 case NEON::BI__builtin_neon_vqtbl4q_v:
10106 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
10107 case NEON::BI__builtin_neon_vqtbx1_v:
10108 case NEON::BI__builtin_neon_vqtbx1q_v:
10109 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
10110 case NEON::BI__builtin_neon_vqtbx2_v:
10111 case NEON::BI__builtin_neon_vqtbx2q_v:
10112 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
10113 case NEON::BI__builtin_neon_vqtbx3_v:
10114 case NEON::BI__builtin_neon_vqtbx3q_v:
10115 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
10116 case NEON::BI__builtin_neon_vqtbx4_v:
10117 case NEON::BI__builtin_neon_vqtbx4q_v:
10118 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
10119 }
10120 }
10121
10122 if (!Int)
10123 return nullptr;
10124
10125 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
10126 return CGF.EmitNeonCall(F, Ops, s);
10127}
10128
10130 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
10131 Op = Builder.CreateBitCast(Op, Int16Ty);
10132 Value *V = PoisonValue::get(VTy);
10133 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
10134 Op = Builder.CreateInsertElement(V, Op, CI);
10135 return Op;
10136}
10137
10138/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
10139/// access builtin. Only required if it can't be inferred from the base pointer
10140/// operand.
10141llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
10142 switch (TypeFlags.getMemEltType()) {
10143 case SVETypeFlags::MemEltTyDefault:
10144 return getEltType(TypeFlags);
10145 case SVETypeFlags::MemEltTyInt8:
10146 return Builder.getInt8Ty();
10147 case SVETypeFlags::MemEltTyInt16:
10148 return Builder.getInt16Ty();
10149 case SVETypeFlags::MemEltTyInt32:
10150 return Builder.getInt32Ty();
10151 case SVETypeFlags::MemEltTyInt64:
10152 return Builder.getInt64Ty();
10153 }
10154 llvm_unreachable("Unknown MemEltType");
10155}
10156
10157llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
10158 switch (TypeFlags.getEltType()) {
10159 default:
10160 llvm_unreachable("Invalid SVETypeFlag!");
10161
10162 case SVETypeFlags::EltTyInt8:
10163 return Builder.getInt8Ty();
10164 case SVETypeFlags::EltTyInt16:
10165 return Builder.getInt16Ty();
10166 case SVETypeFlags::EltTyInt32:
10167 return Builder.getInt32Ty();
10168 case SVETypeFlags::EltTyInt64:
10169 return Builder.getInt64Ty();
10170 case SVETypeFlags::EltTyInt128:
10171 return Builder.getInt128Ty();
10172
10173 case SVETypeFlags::EltTyFloat16:
10174 return Builder.getHalfTy();
10175 case SVETypeFlags::EltTyFloat32:
10176 return Builder.getFloatTy();
10177 case SVETypeFlags::EltTyFloat64:
10178 return Builder.getDoubleTy();
10179
10180 case SVETypeFlags::EltTyBFloat16:
10181 return Builder.getBFloatTy();
10182
10183 case SVETypeFlags::EltTyBool8:
10184 case SVETypeFlags::EltTyBool16:
10185 case SVETypeFlags::EltTyBool32:
10186 case SVETypeFlags::EltTyBool64:
10187 return Builder.getInt1Ty();
10188 }
10189}
10190
10191// Return the llvm predicate vector type corresponding to the specified element
10192// TypeFlags.
10193llvm::ScalableVectorType *
10195 switch (TypeFlags.getEltType()) {
10196 default: llvm_unreachable("Unhandled SVETypeFlag!");
10197
10198 case SVETypeFlags::EltTyInt8:
10199 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10200 case SVETypeFlags::EltTyInt16:
10201 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10202 case SVETypeFlags::EltTyInt32:
10203 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10204 case SVETypeFlags::EltTyInt64:
10205 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10206
10207 case SVETypeFlags::EltTyBFloat16:
10208 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10209 case SVETypeFlags::EltTyFloat16:
10210 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10211 case SVETypeFlags::EltTyFloat32:
10212 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10213 case SVETypeFlags::EltTyFloat64:
10214 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10215
10216 case SVETypeFlags::EltTyBool8:
10217 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10218 case SVETypeFlags::EltTyBool16:
10219 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10220 case SVETypeFlags::EltTyBool32:
10221 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10222 case SVETypeFlags::EltTyBool64:
10223 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10224 }
10225}
10226
10227// Return the llvm vector type corresponding to the specified element TypeFlags.
10228llvm::ScalableVectorType *
10229CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
10230 switch (TypeFlags.getEltType()) {
10231 default:
10232 llvm_unreachable("Invalid SVETypeFlag!");
10233
10234 case SVETypeFlags::EltTyInt8:
10235 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10236 case SVETypeFlags::EltTyInt16:
10237 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
10238 case SVETypeFlags::EltTyInt32:
10239 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
10240 case SVETypeFlags::EltTyInt64:
10241 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
10242
10243 case SVETypeFlags::EltTyMFloat8:
10244 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10245 case SVETypeFlags::EltTyFloat16:
10246 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
10247 case SVETypeFlags::EltTyBFloat16:
10248 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
10249 case SVETypeFlags::EltTyFloat32:
10250 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
10251 case SVETypeFlags::EltTyFloat64:
10252 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
10253
10254 case SVETypeFlags::EltTyBool8:
10255 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10256 case SVETypeFlags::EltTyBool16:
10257 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10258 case SVETypeFlags::EltTyBool32:
10259 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10260 case SVETypeFlags::EltTyBool64:
10261 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10262 }
10263}
10264
10265llvm::Value *
10267 Function *Ptrue =
10268 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
10269 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
10270}
10271
10272constexpr unsigned SVEBitsPerBlock = 128;
10273
10274static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
10275 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
10276 return llvm::ScalableVectorType::get(EltTy, NumElts);
10277}
10278
10279// Reinterpret the input predicate so that it can be used to correctly isolate
10280// the elements of the specified datatype.
10282 llvm::ScalableVectorType *VTy) {
10283
10284 if (isa<TargetExtType>(Pred->getType()) &&
10285 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
10286 return Pred;
10287
10288 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
10289 if (Pred->getType() == RTy)
10290 return Pred;
10291
10292 unsigned IntID;
10293 llvm::Type *IntrinsicTy;
10294 switch (VTy->getMinNumElements()) {
10295 default:
10296 llvm_unreachable("unsupported element count!");
10297 case 1:
10298 case 2:
10299 case 4:
10300 case 8:
10301 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10302 IntrinsicTy = RTy;
10303 break;
10304 case 16:
10305 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10306 IntrinsicTy = Pred->getType();
10307 break;
10308 }
10309
10310 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
10311 Value *C = Builder.CreateCall(F, Pred);
10312 assert(C->getType() == RTy && "Unexpected return type!");
10313 return C;
10314}
10315
10317 llvm::StructType *Ty) {
10318 if (PredTuple->getType() == Ty)
10319 return PredTuple;
10320
10321 Value *Ret = llvm::PoisonValue::get(Ty);
10322 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
10323 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
10324 Pred = EmitSVEPredicateCast(
10325 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10326 Ret = Builder.CreateInsertValue(Ret, Pred, I);
10327 }
10328
10329 return Ret;
10330}
10331
10334 unsigned IntID) {
10335 auto *ResultTy = getSVEType(TypeFlags);
10336 auto *OverloadedTy =
10337 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
10338
10339 Function *F = nullptr;
10340 if (Ops[1]->getType()->isVectorTy())
10341 // This is the "vector base, scalar offset" case. In order to uniquely
10342 // map this built-in to an LLVM IR intrinsic, we need both the return type
10343 // and the type of the vector base.
10344 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
10345 else
10346 // This is the "scalar base, vector offset case". The type of the offset
10347 // is encoded in the name of the intrinsic. We only need to specify the
10348 // return type in order to uniquely map this built-in to an LLVM IR
10349 // intrinsic.
10350 F = CGM.getIntrinsic(IntID, OverloadedTy);
10351
10352 // At the ACLE level there's only one predicate type, svbool_t, which is
10353 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10354 // actual type being loaded. For example, when loading doubles (i64) the
10355 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10356 // the predicate and the data being loaded must match. Cast to the type
10357 // expected by the intrinsic. The intrinsic itself should be defined in
10358 // a way than enforces relations between parameter types.
10359 Ops[0] = EmitSVEPredicateCast(
10360 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10361
10362 // Pass 0 when the offset is missing. This can only be applied when using
10363 // the "vector base" addressing mode for which ACLE allows no offset. The
10364 // corresponding LLVM IR always requires an offset.
10365 if (Ops.size() == 2) {
10366 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10367 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10368 }
10369
10370 // For "vector base, scalar index" scale the index so that it becomes a
10371 // scalar offset.
10372 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10373 unsigned BytesPerElt =
10374 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10375 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10376 }
10377
10378 Value *Call = Builder.CreateCall(F, Ops);
10379
10380 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10381 // other cases it's folded into a nop.
10382 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
10383 : Builder.CreateSExt(Call, ResultTy);
10384}
10385
10388 unsigned IntID) {
10389 auto *SrcDataTy = getSVEType(TypeFlags);
10390 auto *OverloadedTy =
10391 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
10392
10393 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10394 // it's the first argument. Move it accordingly.
10395 Ops.insert(Ops.begin(), Ops.pop_back_val());
10396
10397 Function *F = nullptr;
10398 if (Ops[2]->getType()->isVectorTy())
10399 // This is the "vector base, scalar offset" case. In order to uniquely
10400 // map this built-in to an LLVM IR intrinsic, we need both the return type
10401 // and the type of the vector base.
10402 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
10403 else
10404 // This is the "scalar base, vector offset case". The type of the offset
10405 // is encoded in the name of the intrinsic. We only need to specify the
10406 // return type in order to uniquely map this built-in to an LLVM IR
10407 // intrinsic.
10408 F = CGM.getIntrinsic(IntID, OverloadedTy);
10409
10410 // Pass 0 when the offset is missing. This can only be applied when using
10411 // the "vector base" addressing mode for which ACLE allows no offset. The
10412 // corresponding LLVM IR always requires an offset.
10413 if (Ops.size() == 3) {
10414 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10415 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10416 }
10417
10418 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10419 // folded into a nop.
10420 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
10421
10422 // At the ACLE level there's only one predicate type, svbool_t, which is
10423 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10424 // actual type being stored. For example, when storing doubles (i64) the
10425 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10426 // the predicate and the data being stored must match. Cast to the type
10427 // expected by the intrinsic. The intrinsic itself should be defined in
10428 // a way that enforces relations between parameter types.
10429 Ops[1] = EmitSVEPredicateCast(
10430 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10431
10432 // For "vector base, scalar index" scale the index so that it becomes a
10433 // scalar offset.
10434 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10435 unsigned BytesPerElt =
10436 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10437 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10438 }
10439
10440 return Builder.CreateCall(F, Ops);
10441}
10442
10445 unsigned IntID) {
10446 // The gather prefetches are overloaded on the vector input - this can either
10447 // be the vector of base addresses or vector of offsets.
10448 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10449 if (!OverloadedTy)
10450 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10451
10452 // Cast the predicate from svbool_t to the right number of elements.
10453 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
10454
10455 // vector + imm addressing modes
10456 if (Ops[1]->getType()->isVectorTy()) {
10457 if (Ops.size() == 3) {
10458 // Pass 0 for 'vector+imm' when the index is omitted.
10459 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10460
10461 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10462 std::swap(Ops[2], Ops[3]);
10463 } else {
10464 // Index needs to be passed as scaled offset.
10465 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10466 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10467 if (BytesPerElt > 1)
10468 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10469 }
10470 }
10471
10472 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10473 return Builder.CreateCall(F, Ops);
10474}
10475
10478 unsigned IntID) {
10479 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10480 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10481 Value *BasePtr = Ops[1];
10482
10483 // Does the load have an offset?
10484 if (Ops.size() > 2)
10485 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10486
10487 Function *F = CGM.getIntrinsic(IntID, {VTy});
10488 return Builder.CreateCall(F, {Predicate, BasePtr});
10489}
10490
10493 unsigned IntID) {
10494 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10495
10496 unsigned N;
10497 switch (IntID) {
10498 case Intrinsic::aarch64_sve_st2:
10499 case Intrinsic::aarch64_sve_st1_pn_x2:
10500 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10501 case Intrinsic::aarch64_sve_st2q:
10502 N = 2;
10503 break;
10504 case Intrinsic::aarch64_sve_st3:
10505 case Intrinsic::aarch64_sve_st3q:
10506 N = 3;
10507 break;
10508 case Intrinsic::aarch64_sve_st4:
10509 case Intrinsic::aarch64_sve_st1_pn_x4:
10510 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10511 case Intrinsic::aarch64_sve_st4q:
10512 N = 4;
10513 break;
10514 default:
10515 llvm_unreachable("unknown intrinsic!");
10516 }
10517
10518 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10519 Value *BasePtr = Ops[1];
10520
10521 // Does the store have an offset?
10522 if (Ops.size() > (2 + N))
10523 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10524
10525 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10526 // need to break up the tuple vector.
10528 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10529 Operands.push_back(Ops[I]);
10530 Operands.append({Predicate, BasePtr});
10531 Function *F = CGM.getIntrinsic(IntID, { VTy });
10532
10533 return Builder.CreateCall(F, Operands);
10534}
10535
10536// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10537// svpmullt_pair intrinsics, with the exception that their results are bitcast
10538// to a wider type.
10541 unsigned BuiltinID) {
10542 // Splat scalar operand to vector (intrinsics with _n infix)
10543 if (TypeFlags.hasSplatOperand()) {
10544 unsigned OpNo = TypeFlags.getSplatOperand();
10545 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10546 }
10547
10548 // The pair-wise function has a narrower overloaded type.
10549 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10550 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10551
10552 // Now bitcast to the wider result type.
10553 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10554 return EmitSVEReinterpret(Call, Ty);
10555}
10556
10558 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10559 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10560 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10561 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10562}
10563
10566 unsigned BuiltinID) {
10567 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10568 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10569 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10570
10571 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10572 Value *BasePtr = Ops[1];
10573
10574 // Implement the index operand if not omitted.
10575 if (Ops.size() > 3)
10576 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10577
10578 Value *PrfOp = Ops.back();
10579
10580 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10581 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10582}
10583
10585 llvm::Type *ReturnTy,
10587 unsigned IntrinsicID,
10588 bool IsZExtReturn) {
10589 QualType LangPTy = E->getArg(1)->getType();
10590 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10591 LangPTy->castAs<PointerType>()->getPointeeType());
10592
10593 // The vector type that is returned may be different from the
10594 // eventual type loaded from memory.
10595 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10596 llvm::ScalableVectorType *MemoryTy = nullptr;
10597 llvm::ScalableVectorType *PredTy = nullptr;
10598 bool IsQuadLoad = false;
10599 switch (IntrinsicID) {
10600 case Intrinsic::aarch64_sve_ld1uwq:
10601 case Intrinsic::aarch64_sve_ld1udq:
10602 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10603 PredTy = llvm::ScalableVectorType::get(
10604 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10605 IsQuadLoad = true;
10606 break;
10607 default:
10608 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10609 PredTy = MemoryTy;
10610 break;
10611 }
10612
10613 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10614 Value *BasePtr = Ops[1];
10615
10616 // Does the load have an offset?
10617 if (Ops.size() > 2)
10618 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10619
10620 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10621 auto *Load =
10622 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10623 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10624 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10625
10626 if (IsQuadLoad)
10627 return Load;
10628
10629 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10630 : Builder.CreateSExt(Load, VectorTy);
10631}
10632
10635 unsigned IntrinsicID) {
10636 QualType LangPTy = E->getArg(1)->getType();
10637 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10638 LangPTy->castAs<PointerType>()->getPointeeType());
10639
10640 // The vector type that is stored may be different from the
10641 // eventual type stored to memory.
10642 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10643 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10644
10645 auto PredTy = MemoryTy;
10646 auto AddrMemoryTy = MemoryTy;
10647 bool IsQuadStore = false;
10648
10649 switch (IntrinsicID) {
10650 case Intrinsic::aarch64_sve_st1wq:
10651 case Intrinsic::aarch64_sve_st1dq:
10652 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10653 PredTy =
10654 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10655 IsQuadStore = true;
10656 break;
10657 default:
10658 break;
10659 }
10660 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10661 Value *BasePtr = Ops[1];
10662
10663 // Does the store have an offset?
10664 if (Ops.size() == 4)
10665 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10666
10667 // Last value is always the data
10668 Value *Val =
10669 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10670
10671 Function *F =
10672 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10673 auto *Store =
10674 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10675 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10676 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10677 return Store;
10678}
10679
10682 unsigned IntID) {
10683 Ops[2] = EmitSVEPredicateCast(
10685
10686 SmallVector<Value *> NewOps;
10687 NewOps.push_back(Ops[2]);
10688
10689 llvm::Value *BasePtr = Ops[3];
10690 llvm::Value *RealSlice = Ops[1];
10691 // If the intrinsic contains the vnum parameter, multiply it with the vector
10692 // size in bytes.
10693 if (Ops.size() == 5) {
10694 Function *StreamingVectorLength =
10695 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10696 llvm::Value *StreamingVectorLengthCall =
10697 Builder.CreateCall(StreamingVectorLength);
10698 llvm::Value *Mulvl =
10699 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10700 // The type of the ptr parameter is void *, so use Int8Ty here.
10701 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10702 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10703 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10704 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
10705 }
10706 NewOps.push_back(BasePtr);
10707 NewOps.push_back(Ops[0]);
10708 NewOps.push_back(RealSlice);
10709 Function *F = CGM.getIntrinsic(IntID);
10710 return Builder.CreateCall(F, NewOps);
10711}
10712
10715 unsigned IntID) {
10716 auto *VecTy = getSVEType(TypeFlags);
10717 Function *F = CGM.getIntrinsic(IntID, VecTy);
10718 if (TypeFlags.isReadZA())
10719 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10720 else if (TypeFlags.isWriteZA())
10721 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10722 return Builder.CreateCall(F, Ops);
10723}
10724
10727 unsigned IntID) {
10728 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10729 if (Ops.size() == 0)
10730 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10731 Function *F = CGM.getIntrinsic(IntID, {});
10732 return Builder.CreateCall(F, Ops);
10733}
10734
10737 unsigned IntID) {
10738 if (Ops.size() == 2)
10739 Ops.push_back(Builder.getInt32(0));
10740 else
10741 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10742 Function *F = CGM.getIntrinsic(IntID, {});
10743 return Builder.CreateCall(F, Ops);
10744}
10745
10746// Limit the usage of scalable llvm IR generated by the ACLE by using the
10747// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10748Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10749 return Builder.CreateVectorSplat(
10750 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10751}
10752
10754 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
10755#ifndef NDEBUG
10756 auto *VecTy = cast<llvm::VectorType>(Ty);
10757 ElementCount EC = VecTy->getElementCount();
10758 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
10759 "Only <1 x i8> expected");
10760#endif
10761 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
10762 }
10763 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10764}
10765
10766Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10767 // FIXME: For big endian this needs an additional REV, or needs a separate
10768 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10769 // instruction is defined as 'bitwise' equivalent from memory point of
10770 // view (when storing/reloading), whereas the svreinterpret builtin
10771 // implements bitwise equivalent cast from register point of view.
10772 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10773
10774 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10775 Value *Tuple = llvm::PoisonValue::get(Ty);
10776
10777 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10778 Value *In = Builder.CreateExtractValue(Val, I);
10779 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10780 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10781 }
10782
10783 return Tuple;
10784 }
10785
10786 return Builder.CreateBitCast(Val, Ty);
10787}
10788
10789static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10791 auto *SplatZero = Constant::getNullValue(Ty);
10792 Ops.insert(Ops.begin(), SplatZero);
10793}
10794
10795static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10797 auto *SplatUndef = UndefValue::get(Ty);
10798 Ops.insert(Ops.begin(), SplatUndef);
10799}
10800
10803 llvm::Type *ResultType,
10804 ArrayRef<Value *> Ops) {
10805 if (TypeFlags.isOverloadNone())
10806 return {};
10807
10808 llvm::Type *DefaultType = getSVEType(TypeFlags);
10809
10810 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10811 return {DefaultType, Ops[1]->getType()};
10812
10813 if (TypeFlags.isOverloadWhileRW())
10814 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10815
10816 if (TypeFlags.isOverloadCvt())
10817 return {Ops[0]->getType(), Ops.back()->getType()};
10818
10819 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10820 ResultType->isVectorTy())
10821 return {ResultType, Ops[1]->getType()};
10822
10823 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10824 return {DefaultType};
10825}
10826
10828 ArrayRef<Value *> Ops) {
10829 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10830 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10831 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10832
10833 if (TypeFlags.isTupleSet())
10834 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10835 return Builder.CreateExtractValue(Ops[0], Idx);
10836}
10837
10839 llvm::Type *Ty,
10840 ArrayRef<Value *> Ops) {
10841 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10842
10843 Value *Tuple = llvm::PoisonValue::get(Ty);
10844 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10845 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10846
10847 return Tuple;
10848}
10849
10851 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10852 SVETypeFlags TypeFlags) {
10853 // Find out if any arguments are required to be integer constant expressions.
10854 unsigned ICEArguments = 0;
10856 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10857 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10858
10859 // Tuple set/get only requires one insert/extract vector, which is
10860 // created by EmitSVETupleSetOrGet.
10861 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10862
10863 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10864 bool IsICE = ICEArguments & (1 << i);
10865 Value *Arg = EmitScalarExpr(E->getArg(i));
10866
10867 if (IsICE) {
10868 // If this is required to be a constant, constant fold it so that we know
10869 // that the generated intrinsic gets a ConstantInt.
10870 std::optional<llvm::APSInt> Result =
10871 E->getArg(i)->getIntegerConstantExpr(getContext());
10872 assert(Result && "Expected argument to be a constant");
10873
10874 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10875 // truncate because the immediate has been range checked and no valid
10876 // immediate requires more than a handful of bits.
10877 *Result = Result->extOrTrunc(32);
10878 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10879 continue;
10880 }
10881
10882 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10883 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10884 Ops.push_back(Builder.CreateExtractValue(Arg, I));
10885
10886 continue;
10887 }
10888
10889 Ops.push_back(Arg);
10890 }
10891}
10892
10894 const CallExpr *E) {
10895 llvm::Type *Ty = ConvertType(E->getType());
10896 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10897 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10898 Value *Val = EmitScalarExpr(E->getArg(0));
10899 return EmitSVEReinterpret(Val, Ty);
10900 }
10901
10902 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10904
10906 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10907 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10908
10909 if (TypeFlags.isLoad())
10910 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10911 TypeFlags.isZExtReturn());
10912 else if (TypeFlags.isStore())
10913 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10914 else if (TypeFlags.isGatherLoad())
10915 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10916 else if (TypeFlags.isScatterStore())
10917 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10918 else if (TypeFlags.isPrefetch())
10919 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10920 else if (TypeFlags.isGatherPrefetch())
10921 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10922 else if (TypeFlags.isStructLoad())
10923 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10924 else if (TypeFlags.isStructStore())
10925 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10926 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10927 return EmitSVETupleSetOrGet(TypeFlags, Ops);
10928 else if (TypeFlags.isTupleCreate())
10929 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10930 else if (TypeFlags.isUndef())
10931 return UndefValue::get(Ty);
10932 else if (Builtin->LLVMIntrinsic != 0) {
10933 // Emit set FPMR for intrinsics that require it
10934 if (TypeFlags.setsFPMR())
10935 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10936 Ops.pop_back_val());
10937 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10939
10940 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10942
10943 // Some ACLE builtins leave out the argument to specify the predicate
10944 // pattern, which is expected to be expanded to an SV_ALL pattern.
10945 if (TypeFlags.isAppendSVALL())
10946 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10947 if (TypeFlags.isInsertOp1SVALL())
10948 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10949
10950 // Predicates must match the main datatype.
10951 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10952 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10953 if (PredTy->getElementType()->isIntegerTy(1))
10954 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10955
10956 // Splat scalar operand to vector (intrinsics with _n infix)
10957 if (TypeFlags.hasSplatOperand()) {
10958 unsigned OpNo = TypeFlags.getSplatOperand();
10959 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10960 }
10961
10962 if (TypeFlags.isReverseCompare())
10963 std::swap(Ops[1], Ops[2]);
10964 else if (TypeFlags.isReverseUSDOT())
10965 std::swap(Ops[1], Ops[2]);
10966 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10967 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10968 std::swap(Ops[1], Ops[2]);
10969 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10970 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10971 std::swap(Ops[1], Ops[3]);
10972
10973 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10974 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10975 llvm::Type *OpndTy = Ops[1]->getType();
10976 auto *SplatZero = Constant::getNullValue(OpndTy);
10977 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10978 }
10979
10980 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10981 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10982 Value *Call = Builder.CreateCall(F, Ops);
10983
10984 if (Call->getType() == Ty)
10985 return Call;
10986
10987 // Predicate results must be converted to svbool_t.
10988 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10989 return EmitSVEPredicateCast(Call, PredTy);
10990 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10991 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
10992
10993 llvm_unreachable("unsupported element count!");
10994 }
10995
10996 switch (BuiltinID) {
10997 default:
10998 return nullptr;
10999
11000 case SVE::BI__builtin_sve_svreinterpret_b: {
11001 auto SVCountTy =
11002 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11003 Function *CastFromSVCountF =
11004 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
11005 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
11006 }
11007 case SVE::BI__builtin_sve_svreinterpret_c: {
11008 auto SVCountTy =
11009 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11010 Function *CastToSVCountF =
11011 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
11012 return Builder.CreateCall(CastToSVCountF, Ops[0]);
11013 }
11014
11015 case SVE::BI__builtin_sve_svpsel_lane_b8:
11016 case SVE::BI__builtin_sve_svpsel_lane_b16:
11017 case SVE::BI__builtin_sve_svpsel_lane_b32:
11018 case SVE::BI__builtin_sve_svpsel_lane_b64:
11019 case SVE::BI__builtin_sve_svpsel_lane_c8:
11020 case SVE::BI__builtin_sve_svpsel_lane_c16:
11021 case SVE::BI__builtin_sve_svpsel_lane_c32:
11022 case SVE::BI__builtin_sve_svpsel_lane_c64: {
11023 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
11024 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
11025 "aarch64.svcount")) &&
11026 "Unexpected TargetExtType");
11027 auto SVCountTy =
11028 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11029 Function *CastFromSVCountF =
11030 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
11031 Function *CastToSVCountF =
11032 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
11033
11034 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
11035 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
11036 llvm::Value *Ops0 =
11037 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
11038 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
11039 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
11040 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
11041 }
11042 case SVE::BI__builtin_sve_svmov_b_z: {
11043 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
11044 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11045 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11046 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
11047 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
11048 }
11049
11050 case SVE::BI__builtin_sve_svnot_b_z: {
11051 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
11052 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11053 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11054 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
11055 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
11056 }
11057
11058 case SVE::BI__builtin_sve_svmovlb_u16:
11059 case SVE::BI__builtin_sve_svmovlb_u32:
11060 case SVE::BI__builtin_sve_svmovlb_u64:
11061 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
11062
11063 case SVE::BI__builtin_sve_svmovlb_s16:
11064 case SVE::BI__builtin_sve_svmovlb_s32:
11065 case SVE::BI__builtin_sve_svmovlb_s64:
11066 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
11067
11068 case SVE::BI__builtin_sve_svmovlt_u16:
11069 case SVE::BI__builtin_sve_svmovlt_u32:
11070 case SVE::BI__builtin_sve_svmovlt_u64:
11071 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
11072
11073 case SVE::BI__builtin_sve_svmovlt_s16:
11074 case SVE::BI__builtin_sve_svmovlt_s32:
11075 case SVE::BI__builtin_sve_svmovlt_s64:
11076 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
11077
11078 case SVE::BI__builtin_sve_svpmullt_u16:
11079 case SVE::BI__builtin_sve_svpmullt_u64:
11080 case SVE::BI__builtin_sve_svpmullt_n_u16:
11081 case SVE::BI__builtin_sve_svpmullt_n_u64:
11082 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
11083
11084 case SVE::BI__builtin_sve_svpmullb_u16:
11085 case SVE::BI__builtin_sve_svpmullb_u64:
11086 case SVE::BI__builtin_sve_svpmullb_n_u16:
11087 case SVE::BI__builtin_sve_svpmullb_n_u64:
11088 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
11089
11090 case SVE::BI__builtin_sve_svdup_n_b8:
11091 case SVE::BI__builtin_sve_svdup_n_b16:
11092 case SVE::BI__builtin_sve_svdup_n_b32:
11093 case SVE::BI__builtin_sve_svdup_n_b64: {
11094 Value *CmpNE =
11095 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
11096 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
11097 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
11098 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
11099 }
11100
11101 case SVE::BI__builtin_sve_svdupq_n_b8:
11102 case SVE::BI__builtin_sve_svdupq_n_b16:
11103 case SVE::BI__builtin_sve_svdupq_n_b32:
11104 case SVE::BI__builtin_sve_svdupq_n_b64:
11105 case SVE::BI__builtin_sve_svdupq_n_u8:
11106 case SVE::BI__builtin_sve_svdupq_n_s8:
11107 case SVE::BI__builtin_sve_svdupq_n_u64:
11108 case SVE::BI__builtin_sve_svdupq_n_f64:
11109 case SVE::BI__builtin_sve_svdupq_n_s64:
11110 case SVE::BI__builtin_sve_svdupq_n_u16:
11111 case SVE::BI__builtin_sve_svdupq_n_f16:
11112 case SVE::BI__builtin_sve_svdupq_n_bf16:
11113 case SVE::BI__builtin_sve_svdupq_n_s16:
11114 case SVE::BI__builtin_sve_svdupq_n_u32:
11115 case SVE::BI__builtin_sve_svdupq_n_f32:
11116 case SVE::BI__builtin_sve_svdupq_n_s32: {
11117 // These builtins are implemented by storing each element to an array and using
11118 // ld1rq to materialize a vector.
11119 unsigned NumOpnds = Ops.size();
11120
11121 bool IsBoolTy =
11122 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
11123
11124 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
11125 // so that the compare can use the width that is natural for the expected
11126 // number of predicate lanes.
11127 llvm::Type *EltTy = Ops[0]->getType();
11128 if (IsBoolTy)
11129 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
11130
11132 for (unsigned I = 0; I < NumOpnds; ++I)
11133 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
11134 Value *Vec = BuildVector(VecOps);
11135
11136 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
11137 Value *InsertSubVec = Builder.CreateInsertVector(
11138 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
11139
11140 Function *F =
11141 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
11142 Value *DupQLane =
11143 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
11144
11145 if (!IsBoolTy)
11146 return DupQLane;
11147
11148 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11149 Value *Pred = EmitSVEAllTruePred(TypeFlags);
11150
11151 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11152 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
11153 : Intrinsic::aarch64_sve_cmpne_wide,
11154 OverloadedTy);
11155 Value *Call = Builder.CreateCall(
11156 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
11157 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
11158 }
11159
11160 case SVE::BI__builtin_sve_svpfalse_b:
11161 return ConstantInt::getFalse(Ty);
11162
11163 case SVE::BI__builtin_sve_svpfalse_c: {
11164 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
11165 Function *CastToSVCountF =
11166 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
11167 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11168 }
11169
11170 case SVE::BI__builtin_sve_svlen_bf16:
11171 case SVE::BI__builtin_sve_svlen_f16:
11172 case SVE::BI__builtin_sve_svlen_f32:
11173 case SVE::BI__builtin_sve_svlen_f64:
11174 case SVE::BI__builtin_sve_svlen_s8:
11175 case SVE::BI__builtin_sve_svlen_s16:
11176 case SVE::BI__builtin_sve_svlen_s32:
11177 case SVE::BI__builtin_sve_svlen_s64:
11178 case SVE::BI__builtin_sve_svlen_u8:
11179 case SVE::BI__builtin_sve_svlen_u16:
11180 case SVE::BI__builtin_sve_svlen_u32:
11181 case SVE::BI__builtin_sve_svlen_u64: {
11182 SVETypeFlags TF(Builtin->TypeModifier);
11183 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
11184 auto *NumEls =
11185 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11186
11187 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
11188 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
11189 }
11190
11191 case SVE::BI__builtin_sve_svtbl2_u8:
11192 case SVE::BI__builtin_sve_svtbl2_s8:
11193 case SVE::BI__builtin_sve_svtbl2_u16:
11194 case SVE::BI__builtin_sve_svtbl2_s16:
11195 case SVE::BI__builtin_sve_svtbl2_u32:
11196 case SVE::BI__builtin_sve_svtbl2_s32:
11197 case SVE::BI__builtin_sve_svtbl2_u64:
11198 case SVE::BI__builtin_sve_svtbl2_s64:
11199 case SVE::BI__builtin_sve_svtbl2_f16:
11200 case SVE::BI__builtin_sve_svtbl2_bf16:
11201 case SVE::BI__builtin_sve_svtbl2_f32:
11202 case SVE::BI__builtin_sve_svtbl2_f64: {
11203 SVETypeFlags TF(Builtin->TypeModifier);
11204 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
11205 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
11206 return Builder.CreateCall(F, Ops);
11207 }
11208
11209 case SVE::BI__builtin_sve_svset_neonq_s8:
11210 case SVE::BI__builtin_sve_svset_neonq_s16:
11211 case SVE::BI__builtin_sve_svset_neonq_s32:
11212 case SVE::BI__builtin_sve_svset_neonq_s64:
11213 case SVE::BI__builtin_sve_svset_neonq_u8:
11214 case SVE::BI__builtin_sve_svset_neonq_u16:
11215 case SVE::BI__builtin_sve_svset_neonq_u32:
11216 case SVE::BI__builtin_sve_svset_neonq_u64:
11217 case SVE::BI__builtin_sve_svset_neonq_f16:
11218 case SVE::BI__builtin_sve_svset_neonq_f32:
11219 case SVE::BI__builtin_sve_svset_neonq_f64:
11220 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11221 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
11222 }
11223
11224 case SVE::BI__builtin_sve_svget_neonq_s8:
11225 case SVE::BI__builtin_sve_svget_neonq_s16:
11226 case SVE::BI__builtin_sve_svget_neonq_s32:
11227 case SVE::BI__builtin_sve_svget_neonq_s64:
11228 case SVE::BI__builtin_sve_svget_neonq_u8:
11229 case SVE::BI__builtin_sve_svget_neonq_u16:
11230 case SVE::BI__builtin_sve_svget_neonq_u32:
11231 case SVE::BI__builtin_sve_svget_neonq_u64:
11232 case SVE::BI__builtin_sve_svget_neonq_f16:
11233 case SVE::BI__builtin_sve_svget_neonq_f32:
11234 case SVE::BI__builtin_sve_svget_neonq_f64:
11235 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11236 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
11237 }
11238
11239 case SVE::BI__builtin_sve_svdup_neonq_s8:
11240 case SVE::BI__builtin_sve_svdup_neonq_s16:
11241 case SVE::BI__builtin_sve_svdup_neonq_s32:
11242 case SVE::BI__builtin_sve_svdup_neonq_s64:
11243 case SVE::BI__builtin_sve_svdup_neonq_u8:
11244 case SVE::BI__builtin_sve_svdup_neonq_u16:
11245 case SVE::BI__builtin_sve_svdup_neonq_u32:
11246 case SVE::BI__builtin_sve_svdup_neonq_u64:
11247 case SVE::BI__builtin_sve_svdup_neonq_f16:
11248 case SVE::BI__builtin_sve_svdup_neonq_f32:
11249 case SVE::BI__builtin_sve_svdup_neonq_f64:
11250 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11251 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
11252 Builder.getInt64(0));
11253 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11254 {Insert, Builder.getInt64(0)});
11255 }
11256 }
11257
11258 /// Should not happen
11259 return nullptr;
11260}
11261
11262static void swapCommutativeSMEOperands(unsigned BuiltinID,
11264 unsigned MultiVec;
11265 switch (BuiltinID) {
11266 default:
11267 return;
11268 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11269 MultiVec = 1;
11270 break;
11271 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11272 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11273 MultiVec = 2;
11274 break;
11275 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11276 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11277 MultiVec = 4;
11278 break;
11279 }
11280
11281 if (MultiVec > 0)
11282 for (unsigned I = 0; I < MultiVec; ++I)
11283 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11284}
11285
11287 const CallExpr *E) {
11288 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
11290
11292 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11293 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
11294
11295 if (TypeFlags.isLoad() || TypeFlags.isStore())
11296 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11297 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
11298 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11299 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11300 BuiltinID == SME::BI__builtin_sme_svzero_za)
11301 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11302 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11303 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11304 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11305 BuiltinID == SME::BI__builtin_sme_svstr_za)
11306 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11307
11308 // Emit set FPMR for intrinsics that require it
11309 if (TypeFlags.setsFPMR())
11310 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
11311 Ops.pop_back_val());
11312 // Handle builtins which require their multi-vector operands to be swapped
11313 swapCommutativeSMEOperands(BuiltinID, Ops);
11314
11315 // Should not happen!
11316 if (Builtin->LLVMIntrinsic == 0)
11317 return nullptr;
11318
11319 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
11320 // If we already know the streaming mode, don't bother with the intrinsic
11321 // and emit a constant instead
11322 const auto *FD = cast<FunctionDecl>(CurFuncDecl);
11323 if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
11324 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
11325 if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
11326 bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
11327 return ConstantInt::getBool(Builder.getContext(), IsStreaming);
11328 }
11329 }
11330 }
11331
11332 // Predicates must match the main datatype.
11333 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
11334 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11335 if (PredTy->getElementType()->isIntegerTy(1))
11336 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
11337
11338 Function *F =
11339 TypeFlags.isOverloadNone()
11340 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
11341 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
11342
11343 return Builder.CreateCall(F, Ops);
11344}
11345
11347 const CallExpr *E,
11348 llvm::Triple::ArchType Arch) {
11349 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
11350 BuiltinID <= clang::AArch64::LastSVEBuiltin)
11351 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
11352
11353 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
11354 BuiltinID <= clang::AArch64::LastSMEBuiltin)
11355 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
11356
11357 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11358 return EmitAArch64CpuSupports(E);
11359
11360 unsigned HintID = static_cast<unsigned>(-1);
11361 switch (BuiltinID) {
11362 default: break;
11363 case clang::AArch64::BI__builtin_arm_nop:
11364 HintID = 0;
11365 break;
11366 case clang::AArch64::BI__builtin_arm_yield:
11367 case clang::AArch64::BI__yield:
11368 HintID = 1;
11369 break;
11370 case clang::AArch64::BI__builtin_arm_wfe:
11371 case clang::AArch64::BI__wfe:
11372 HintID = 2;
11373 break;
11374 case clang::AArch64::BI__builtin_arm_wfi:
11375 case clang::AArch64::BI__wfi:
11376 HintID = 3;
11377 break;
11378 case clang::AArch64::BI__builtin_arm_sev:
11379 case clang::AArch64::BI__sev:
11380 HintID = 4;
11381 break;
11382 case clang::AArch64::BI__builtin_arm_sevl:
11383 case clang::AArch64::BI__sevl:
11384 HintID = 5;
11385 break;
11386 }
11387
11388 if (HintID != static_cast<unsigned>(-1)) {
11389 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
11390 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
11391 }
11392
11393 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11394 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
11395 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11396 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
11397 }
11398
11399 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11400 // Create call to __arm_sme_state and store the results to the two pointers.
11402 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
11403 false),
11404 "__arm_sme_state"));
11405 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
11406 "aarch64_pstate_sm_compatible");
11407 CI->setAttributes(Attrs);
11408 CI->setCallingConv(
11409 llvm::CallingConv::
11410 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11411 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
11412 EmitPointerWithAlignment(E->getArg(0)));
11413 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
11414 EmitPointerWithAlignment(E->getArg(1)));
11415 }
11416
11417 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11418 assert((getContext().getTypeSize(E->getType()) == 32) &&
11419 "rbit of unusual size!");
11420 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11421 return Builder.CreateCall(
11422 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11423 }
11424 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11425 assert((getContext().getTypeSize(E->getType()) == 64) &&
11426 "rbit of unusual size!");
11427 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11428 return Builder.CreateCall(
11429 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11430 }
11431
11432 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11433 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11434 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11435 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11436 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11437 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11438 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11439 return Res;
11440 }
11441
11442 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11443 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11444 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11445 "cls");
11446 }
11447 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11448 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11449 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11450 "cls");
11451 }
11452
11453 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11454 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11455 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11456 llvm::Type *Ty = Arg->getType();
11457 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11458 Arg, "frint32z");
11459 }
11460
11461 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11462 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11463 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11464 llvm::Type *Ty = Arg->getType();
11465 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11466 Arg, "frint64z");
11467 }
11468
11469 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11470 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11471 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11472 llvm::Type *Ty = Arg->getType();
11473 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11474 Arg, "frint32x");
11475 }
11476
11477 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11478 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11479 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11480 llvm::Type *Ty = Arg->getType();
11481 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11482 Arg, "frint64x");
11483 }
11484
11485 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11486 assert((getContext().getTypeSize(E->getType()) == 32) &&
11487 "__jcvt of unusual size!");
11488 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11489 return Builder.CreateCall(
11490 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11491 }
11492
11493 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11494 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11495 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11496 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11497 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11498 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11499
11500 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11501 // Load from the address via an LLVM intrinsic, receiving a
11502 // tuple of 8 i64 words, and store each one to ValPtr.
11503 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11504 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11505 llvm::Value *ToRet;
11506 for (size_t i = 0; i < 8; i++) {
11507 llvm::Value *ValOffsetPtr =
11508 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11509 Address Addr =
11510 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11511 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11512 }
11513 return ToRet;
11514 } else {
11515 // Load 8 i64 words from ValPtr, and store them to the address
11516 // via an LLVM intrinsic.
11518 Args.push_back(MemAddr);
11519 for (size_t i = 0; i < 8; i++) {
11520 llvm::Value *ValOffsetPtr =
11521 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11522 Address Addr =
11523 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11524 Args.push_back(Builder.CreateLoad(Addr));
11525 }
11526
11527 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11528 ? Intrinsic::aarch64_st64b
11529 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11530 ? Intrinsic::aarch64_st64bv
11531 : Intrinsic::aarch64_st64bv0);
11532 Function *F = CGM.getIntrinsic(Intr);
11533 return Builder.CreateCall(F, Args);
11534 }
11535 }
11536
11537 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11538 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11539
11540 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11541 ? Intrinsic::aarch64_rndr
11542 : Intrinsic::aarch64_rndrrs);
11543 Function *F = CGM.getIntrinsic(Intr);
11544 llvm::Value *Val = Builder.CreateCall(F);
11545 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11546 Value *Status = Builder.CreateExtractValue(Val, 1);
11547
11548 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11549 Builder.CreateStore(RandomValue, MemAddress);
11550 Status = Builder.CreateZExt(Status, Int32Ty);
11551 return Status;
11552 }
11553
11554 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11555 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11556 const FunctionDecl *FD = E->getDirectCallee();
11557 Value *Ops[2];
11558 for (unsigned i = 0; i < 2; i++)
11559 Ops[i] = EmitScalarExpr(E->getArg(i));
11560 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11561 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11562 StringRef Name = FD->getName();
11563 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11564 }
11565
11566 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11567 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11568 getContext().getTypeSize(E->getType()) == 128) {
11569 Function *F =
11570 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11571 ? Intrinsic::aarch64_ldaxp
11572 : Intrinsic::aarch64_ldxp);
11573
11574 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11575 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11576
11577 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11578 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11579 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11580 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11581 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11582
11583 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11584 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11585 Val = Builder.CreateOr(Val, Val1);
11586 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11587 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11588 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11589 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11590
11591 QualType Ty = E->getType();
11592 llvm::Type *RealResTy = ConvertType(Ty);
11593 llvm::Type *IntTy =
11594 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11595
11596 Function *F =
11597 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11598 ? Intrinsic::aarch64_ldaxr
11599 : Intrinsic::aarch64_ldxr,
11600 UnqualPtrTy);
11601 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11602 Val->addParamAttr(
11603 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11604
11605 if (RealResTy->isPointerTy())
11606 return Builder.CreateIntToPtr(Val, RealResTy);
11607
11608 llvm::Type *IntResTy = llvm::IntegerType::get(
11609 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11610 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11611 RealResTy);
11612 }
11613
11614 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11615 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11616 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11617 Function *F =
11618 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11619 ? Intrinsic::aarch64_stlxp
11620 : Intrinsic::aarch64_stxp);
11621 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11622
11623 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11624 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11625
11626 Tmp = Tmp.withElementType(STy);
11627 llvm::Value *Val = Builder.CreateLoad(Tmp);
11628
11629 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11630 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11631 Value *StPtr = EmitScalarExpr(E->getArg(1));
11632 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11633 }
11634
11635 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11636 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11637 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11638 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11639
11640 QualType Ty = E->getArg(0)->getType();
11641 llvm::Type *StoreTy =
11642 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11643
11644 if (StoreVal->getType()->isPointerTy())
11645 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11646 else {
11647 llvm::Type *IntTy = llvm::IntegerType::get(
11649 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11650 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11651 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11652 }
11653
11654 Function *F =
11655 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11656 ? Intrinsic::aarch64_stlxr
11657 : Intrinsic::aarch64_stxr,
11658 StoreAddr->getType());
11659 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11660 CI->addParamAttr(
11661 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11662 return CI;
11663 }
11664
11665 if (BuiltinID == clang::AArch64::BI__getReg) {
11667 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11668 llvm_unreachable("Sema will ensure that the parameter is constant");
11669
11670 llvm::APSInt Value = Result.Val.getInt();
11671 LLVMContext &Context = CGM.getLLVMContext();
11672 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11673
11674 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11675 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11676 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11677
11678 llvm::Function *F =
11679 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11680 return Builder.CreateCall(F, Metadata);
11681 }
11682
11683 if (BuiltinID == clang::AArch64::BI__break) {
11685 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11686 llvm_unreachable("Sema will ensure that the parameter is constant");
11687
11688 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11689 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11690 }
11691
11692 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11693 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11694 return Builder.CreateCall(F);
11695 }
11696
11697 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11698 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11699 llvm::SyncScope::SingleThread);
11700
11701 // CRC32
11702 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11703 switch (BuiltinID) {
11704 case clang::AArch64::BI__builtin_arm_crc32b:
11705 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11706 case clang::AArch64::BI__builtin_arm_crc32cb:
11707 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11708 case clang::AArch64::BI__builtin_arm_crc32h:
11709 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11710 case clang::AArch64::BI__builtin_arm_crc32ch:
11711 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11712 case clang::AArch64::BI__builtin_arm_crc32w:
11713 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11714 case clang::AArch64::BI__builtin_arm_crc32cw:
11715 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11716 case clang::AArch64::BI__builtin_arm_crc32d:
11717 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11718 case clang::AArch64::BI__builtin_arm_crc32cd:
11719 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11720 }
11721
11722 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11723 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11724 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11725 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11726
11727 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11728 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11729
11730 return Builder.CreateCall(F, {Arg0, Arg1});
11731 }
11732
11733 // Memory Operations (MOPS)
11734 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11735 Value *Dst = EmitScalarExpr(E->getArg(0));
11736 Value *Val = EmitScalarExpr(E->getArg(1));
11737 Value *Size = EmitScalarExpr(E->getArg(2));
11738 Val = Builder.CreateTrunc(Val, Int8Ty);
11739 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11740 return Builder.CreateCall(
11741 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11742 }
11743
11744 // Memory Tagging Extensions (MTE) Intrinsics
11745 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11746 switch (BuiltinID) {
11747 case clang::AArch64::BI__builtin_arm_irg:
11748 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11749 case clang::AArch64::BI__builtin_arm_addg:
11750 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11751 case clang::AArch64::BI__builtin_arm_gmi:
11752 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11753 case clang::AArch64::BI__builtin_arm_ldg:
11754 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11755 case clang::AArch64::BI__builtin_arm_stg:
11756 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11757 case clang::AArch64::BI__builtin_arm_subp:
11758 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11759 }
11760
11761 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11762 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11763 Value *Pointer = EmitScalarExpr(E->getArg(0));
11764 Value *Mask = EmitScalarExpr(E->getArg(1));
11765
11766 Mask = Builder.CreateZExt(Mask, Int64Ty);
11767 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11768 {Pointer, Mask});
11769 }
11770 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11771 Value *Pointer = EmitScalarExpr(E->getArg(0));
11772 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11773
11774 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11775 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11776 {Pointer, TagOffset});
11777 }
11778 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11779 Value *Pointer = EmitScalarExpr(E->getArg(0));
11780 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11781
11782 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11783 return Builder.CreateCall(
11784 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11785 }
11786 // Although it is possible to supply a different return
11787 // address (first arg) to this intrinsic, for now we set
11788 // return address same as input address.
11789 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11790 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11791 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11792 {TagAddress, TagAddress});
11793 }
11794 // Although it is possible to supply a different tag (to set)
11795 // to this intrinsic (as first arg), for now we supply
11796 // the tag that is in input address arg (common use case).
11797 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11798 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11799 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11800 {TagAddress, TagAddress});
11801 }
11802 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11803 Value *PointerA = EmitScalarExpr(E->getArg(0));
11804 Value *PointerB = EmitScalarExpr(E->getArg(1));
11805 return Builder.CreateCall(
11806 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11807 }
11808 }
11809
11810 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11811 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11812 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11813 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11814 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11815 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11816 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11817 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11818
11819 SpecialRegisterAccessKind AccessKind = Write;
11820 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11821 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11822 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11823 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11824 AccessKind = VolatileRead;
11825
11826 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11827 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11828
11829 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11830 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11831
11832 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11833 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11834
11835 llvm::Type *ValueType;
11836 llvm::Type *RegisterType = Int64Ty;
11837 if (Is32Bit) {
11838 ValueType = Int32Ty;
11839 } else if (Is128Bit) {
11840 llvm::Type *Int128Ty =
11841 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11842 ValueType = Int128Ty;
11843 RegisterType = Int128Ty;
11844 } else if (IsPointerBuiltin) {
11845 ValueType = VoidPtrTy;
11846 } else {
11847 ValueType = Int64Ty;
11848 };
11849
11850 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11851 AccessKind);
11852 }
11853
11854 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11855 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11856 LLVMContext &Context = CGM.getLLVMContext();
11857
11858 unsigned SysReg =
11859 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11860
11861 std::string SysRegStr;
11862 llvm::raw_string_ostream(SysRegStr) <<
11863 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11864 ((SysReg >> 11) & 7) << ":" <<
11865 ((SysReg >> 7) & 15) << ":" <<
11866 ((SysReg >> 3) & 15) << ":" <<
11867 ( SysReg & 7);
11868
11869 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11870 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11871 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11872
11873 llvm::Type *RegisterType = Int64Ty;
11874 llvm::Type *Types[] = { RegisterType };
11875
11876 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11877 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11878
11879 return Builder.CreateCall(F, Metadata);
11880 }
11881
11882 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11883 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11884
11885 return Builder.CreateCall(F, { Metadata, ArgValue });
11886 }
11887
11888 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11889 llvm::Function *F =
11890 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11891 return Builder.CreateCall(F);
11892 }
11893
11894 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11895 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11896 return Builder.CreateCall(F);
11897 }
11898
11899 if (BuiltinID == clang::AArch64::BI__mulh ||
11900 BuiltinID == clang::AArch64::BI__umulh) {
11901 llvm::Type *ResType = ConvertType(E->getType());
11902 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11903
11904 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11905 Value *LHS =
11906 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11907 Value *RHS =
11908 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11909
11910 Value *MulResult, *HigherBits;
11911 if (IsSigned) {
11912 MulResult = Builder.CreateNSWMul(LHS, RHS);
11913 HigherBits = Builder.CreateAShr(MulResult, 64);
11914 } else {
11915 MulResult = Builder.CreateNUWMul(LHS, RHS);
11916 HigherBits = Builder.CreateLShr(MulResult, 64);
11917 }
11918 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11919
11920 return HigherBits;
11921 }
11922
11923 if (BuiltinID == AArch64::BI__writex18byte ||
11924 BuiltinID == AArch64::BI__writex18word ||
11925 BuiltinID == AArch64::BI__writex18dword ||
11926 BuiltinID == AArch64::BI__writex18qword) {
11927 // Process the args first
11928 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11929 Value *DataArg = EmitScalarExpr(E->getArg(1));
11930
11931 // Read x18 as i8*
11932 llvm::Value *X18 = readX18AsPtr(*this);
11933
11934 // Store val at x18 + offset
11935 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11936 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11937 StoreInst *Store =
11938 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
11939 return Store;
11940 }
11941
11942 if (BuiltinID == AArch64::BI__readx18byte ||
11943 BuiltinID == AArch64::BI__readx18word ||
11944 BuiltinID == AArch64::BI__readx18dword ||
11945 BuiltinID == AArch64::BI__readx18qword) {
11946 // Process the args first
11947 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11948
11949 // Read x18 as i8*
11950 llvm::Value *X18 = readX18AsPtr(*this);
11951
11952 // Load x18 + offset
11953 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11954 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11955 llvm::Type *IntTy = ConvertType(E->getType());
11956 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11957 return Load;
11958 }
11959
11960 if (BuiltinID == AArch64::BI__addx18byte ||
11961 BuiltinID == AArch64::BI__addx18word ||
11962 BuiltinID == AArch64::BI__addx18dword ||
11963 BuiltinID == AArch64::BI__addx18qword ||
11964 BuiltinID == AArch64::BI__incx18byte ||
11965 BuiltinID == AArch64::BI__incx18word ||
11966 BuiltinID == AArch64::BI__incx18dword ||
11967 BuiltinID == AArch64::BI__incx18qword) {
11968 llvm::Type *IntTy;
11969 bool isIncrement;
11970 switch (BuiltinID) {
11971 case AArch64::BI__incx18byte:
11972 IntTy = Int8Ty;
11973 isIncrement = true;
11974 break;
11975 case AArch64::BI__incx18word:
11976 IntTy = Int16Ty;
11977 isIncrement = true;
11978 break;
11979 case AArch64::BI__incx18dword:
11980 IntTy = Int32Ty;
11981 isIncrement = true;
11982 break;
11983 case AArch64::BI__incx18qword:
11984 IntTy = Int64Ty;
11985 isIncrement = true;
11986 break;
11987 default:
11988 IntTy = ConvertType(E->getArg(1)->getType());
11989 isIncrement = false;
11990 break;
11991 }
11992 // Process the args first
11993 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11994 Value *ValToAdd =
11995 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
11996
11997 // Read x18 as i8*
11998 llvm::Value *X18 = readX18AsPtr(*this);
11999
12000 // Load x18 + offset
12001 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
12002 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
12003 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
12004
12005 // Add values
12006 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
12007
12008 // Store val at x18 + offset
12009 StoreInst *Store =
12010 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
12011 return Store;
12012 }
12013
12014 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
12015 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
12016 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
12017 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
12018 Value *Arg = EmitScalarExpr(E->getArg(0));
12019 llvm::Type *RetTy = ConvertType(E->getType());
12020 return Builder.CreateBitCast(Arg, RetTy);
12021 }
12022
12023 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
12024 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
12025 BuiltinID == AArch64::BI_CountLeadingZeros ||
12026 BuiltinID == AArch64::BI_CountLeadingZeros64) {
12027 Value *Arg = EmitScalarExpr(E->getArg(0));
12028 llvm::Type *ArgType = Arg->getType();
12029
12030 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
12031 BuiltinID == AArch64::BI_CountLeadingOnes64)
12032 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
12033
12034 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
12035 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
12036
12037 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
12038 BuiltinID == AArch64::BI_CountLeadingZeros64)
12039 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12040 return Result;
12041 }
12042
12043 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
12044 BuiltinID == AArch64::BI_CountLeadingSigns64) {
12045 Value *Arg = EmitScalarExpr(E->getArg(0));
12046
12047 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
12048 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
12049 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
12050
12051 Value *Result = Builder.CreateCall(F, Arg, "cls");
12052 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
12053 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12054 return Result;
12055 }
12056
12057 if (BuiltinID == AArch64::BI_CountOneBits ||
12058 BuiltinID == AArch64::BI_CountOneBits64) {
12059 Value *ArgValue = EmitScalarExpr(E->getArg(0));
12060 llvm::Type *ArgType = ArgValue->getType();
12061 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
12062
12063 Value *Result = Builder.CreateCall(F, ArgValue);
12064 if (BuiltinID == AArch64::BI_CountOneBits64)
12065 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12066 return Result;
12067 }
12068
12069 if (BuiltinID == AArch64::BI__prefetch) {
12070 Value *Address = EmitScalarExpr(E->getArg(0));
12071 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
12072 Value *Locality = ConstantInt::get(Int32Ty, 3);
12073 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
12074 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
12075 return Builder.CreateCall(F, {Address, RW, Locality, Data});
12076 }
12077
12078 if (BuiltinID == AArch64::BI__hlt) {
12079 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
12080 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
12081
12082 // Return 0 for convenience, even though MSVC returns some other undefined
12083 // value.
12084 return ConstantInt::get(Builder.getInt32Ty(), 0);
12085 }
12086
12087 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
12088 return Builder.CreateFPTrunc(
12089 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
12090 Builder.getFloatTy()),
12091 Builder.getBFloatTy());
12092
12093 // Handle MSVC intrinsics before argument evaluation to prevent double
12094 // evaluation.
12095 if (std::optional<MSVCIntrin> MsvcIntId =
12097 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
12098
12099 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
12100 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
12101 return P.first == BuiltinID;
12102 });
12103 if (It != end(NEONEquivalentIntrinsicMap))
12104 BuiltinID = It->second;
12105
12106 // Find out if any arguments are required to be integer constant
12107 // expressions.
12108 unsigned ICEArguments = 0;
12110 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
12111 assert(Error == ASTContext::GE_None && "Should not codegen an error");
12112
12114 Address PtrOp0 = Address::invalid();
12115 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
12116 if (i == 0) {
12117 switch (BuiltinID) {
12118 case NEON::BI__builtin_neon_vld1_v:
12119 case NEON::BI__builtin_neon_vld1q_v:
12120 case NEON::BI__builtin_neon_vld1_dup_v:
12121 case NEON::BI__builtin_neon_vld1q_dup_v:
12122 case NEON::BI__builtin_neon_vld1_lane_v:
12123 case NEON::BI__builtin_neon_vld1q_lane_v:
12124 case NEON::BI__builtin_neon_vst1_v:
12125 case NEON::BI__builtin_neon_vst1q_v:
12126 case NEON::BI__builtin_neon_vst1_lane_v:
12127 case NEON::BI__builtin_neon_vst1q_lane_v:
12128 case NEON::BI__builtin_neon_vldap1_lane_s64:
12129 case NEON::BI__builtin_neon_vldap1q_lane_s64:
12130 case NEON::BI__builtin_neon_vstl1_lane_s64:
12131 case NEON::BI__builtin_neon_vstl1q_lane_s64:
12132 // Get the alignment for the argument in addition to the value;
12133 // we'll use it later.
12134 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
12135 Ops.push_back(PtrOp0.emitRawPointer(*this));
12136 continue;
12137 }
12138 }
12139 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
12140 }
12141
12142 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
12143 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
12144 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
12145
12146 if (Builtin) {
12147 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
12148 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
12149 assert(Result && "SISD intrinsic should have been handled");
12150 return Result;
12151 }
12152
12153 const Expr *Arg = E->getArg(E->getNumArgs()-1);
12155 if (std::optional<llvm::APSInt> Result =
12157 // Determine the type of this overloaded NEON intrinsic.
12158 Type = NeonTypeFlags(Result->getZExtValue());
12159
12160 bool usgn = Type.isUnsigned();
12161 bool quad = Type.isQuad();
12162
12163 // Handle non-overloaded intrinsics first.
12164 switch (BuiltinID) {
12165 default: break;
12166 case NEON::BI__builtin_neon_vabsh_f16:
12167 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12168 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
12169 case NEON::BI__builtin_neon_vaddq_p128: {
12170 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
12171 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12172 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12173 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12174 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
12175 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12176 return Builder.CreateBitCast(Ops[0], Int128Ty);
12177 }
12178 case NEON::BI__builtin_neon_vldrq_p128: {
12179 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12180 Value *Ptr = EmitScalarExpr(E->getArg(0));
12181 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
12183 }
12184 case NEON::BI__builtin_neon_vstrq_p128: {
12185 Value *Ptr = Ops[0];
12186 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
12187 }
12188 case NEON::BI__builtin_neon_vcvts_f32_u32:
12189 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12190 usgn = true;
12191 [[fallthrough]];
12192 case NEON::BI__builtin_neon_vcvts_f32_s32:
12193 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12194 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12195 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12196 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
12197 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
12198 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12199 if (usgn)
12200 return Builder.CreateUIToFP(Ops[0], FTy);
12201 return Builder.CreateSIToFP(Ops[0], FTy);
12202 }
12203 case NEON::BI__builtin_neon_vcvth_f16_u16:
12204 case NEON::BI__builtin_neon_vcvth_f16_u32:
12205 case NEON::BI__builtin_neon_vcvth_f16_u64:
12206 usgn = true;
12207 [[fallthrough]];
12208 case NEON::BI__builtin_neon_vcvth_f16_s16:
12209 case NEON::BI__builtin_neon_vcvth_f16_s32:
12210 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12211 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12212 llvm::Type *FTy = HalfTy;
12213 llvm::Type *InTy;
12214 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12215 InTy = Int64Ty;
12216 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12217 InTy = Int32Ty;
12218 else
12219 InTy = Int16Ty;
12220 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12221 if (usgn)
12222 return Builder.CreateUIToFP(Ops[0], FTy);
12223 return Builder.CreateSIToFP(Ops[0], FTy);
12224 }
12225 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12226 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12227 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12228 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12229 case NEON::BI__builtin_neon_vcvth_u16_f16:
12230 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12231 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12232 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12233 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12234 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12235 unsigned Int;
12236 llvm::Type* InTy = Int32Ty;
12237 llvm::Type* FTy = HalfTy;
12238 llvm::Type *Tys[2] = {InTy, FTy};
12239 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12240 switch (BuiltinID) {
12241 default: llvm_unreachable("missing builtin ID in switch!");
12242 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12243 Int = Intrinsic::aarch64_neon_fcvtau; break;
12244 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12245 Int = Intrinsic::aarch64_neon_fcvtmu; break;
12246 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12247 Int = Intrinsic::aarch64_neon_fcvtnu; break;
12248 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12249 Int = Intrinsic::aarch64_neon_fcvtpu; break;
12250 case NEON::BI__builtin_neon_vcvth_u16_f16:
12251 Int = Intrinsic::aarch64_neon_fcvtzu; break;
12252 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12253 Int = Intrinsic::aarch64_neon_fcvtas; break;
12254 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12255 Int = Intrinsic::aarch64_neon_fcvtms; break;
12256 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12257 Int = Intrinsic::aarch64_neon_fcvtns; break;
12258 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12259 Int = Intrinsic::aarch64_neon_fcvtps; break;
12260 case NEON::BI__builtin_neon_vcvth_s16_f16:
12261 Int = Intrinsic::aarch64_neon_fcvtzs; break;
12262 }
12263 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
12264 return Builder.CreateTrunc(Ops[0], Int16Ty);
12265 }
12266 case NEON::BI__builtin_neon_vcaleh_f16:
12267 case NEON::BI__builtin_neon_vcalth_f16:
12268 case NEON::BI__builtin_neon_vcageh_f16:
12269 case NEON::BI__builtin_neon_vcagth_f16: {
12270 unsigned Int;
12271 llvm::Type* InTy = Int32Ty;
12272 llvm::Type* FTy = HalfTy;
12273 llvm::Type *Tys[2] = {InTy, FTy};
12274 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12275 switch (BuiltinID) {
12276 default: llvm_unreachable("missing builtin ID in switch!");
12277 case NEON::BI__builtin_neon_vcageh_f16:
12278 Int = Intrinsic::aarch64_neon_facge; break;
12279 case NEON::BI__builtin_neon_vcagth_f16:
12280 Int = Intrinsic::aarch64_neon_facgt; break;
12281 case NEON::BI__builtin_neon_vcaleh_f16:
12282 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
12283 case NEON::BI__builtin_neon_vcalth_f16:
12284 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
12285 }
12286 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
12287 return Builder.CreateTrunc(Ops[0], Int16Ty);
12288 }
12289 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12290 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12291 unsigned Int;
12292 llvm::Type* InTy = Int32Ty;
12293 llvm::Type* FTy = HalfTy;
12294 llvm::Type *Tys[2] = {InTy, FTy};
12295 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12296 switch (BuiltinID) {
12297 default: llvm_unreachable("missing builtin ID in switch!");
12298 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12299 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
12300 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12301 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
12302 }
12303 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12304 return Builder.CreateTrunc(Ops[0], Int16Ty);
12305 }
12306 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12307 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12308 unsigned Int;
12309 llvm::Type* FTy = HalfTy;
12310 llvm::Type* InTy = Int32Ty;
12311 llvm::Type *Tys[2] = {FTy, InTy};
12312 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12313 switch (BuiltinID) {
12314 default: llvm_unreachable("missing builtin ID in switch!");
12315 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12316 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12317 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
12318 break;
12319 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12320 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12321 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
12322 break;
12323 }
12324 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12325 }
12326 case NEON::BI__builtin_neon_vpaddd_s64: {
12327 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
12328 Value *Vec = EmitScalarExpr(E->getArg(0));
12329 // The vector is v2f64, so make sure it's bitcast to that.
12330 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
12331 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12332 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12333 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12334 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12335 // Pairwise addition of a v2f64 into a scalar f64.
12336 return Builder.CreateAdd(Op0, Op1, "vpaddd");
12337 }
12338 case NEON::BI__builtin_neon_vpaddd_f64: {
12339 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
12340 Value *Vec = EmitScalarExpr(E->getArg(0));
12341 // The vector is v2f64, so make sure it's bitcast to that.
12342 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
12343 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12344 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12345 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12346 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12347 // Pairwise addition of a v2f64 into a scalar f64.
12348 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12349 }
12350 case NEON::BI__builtin_neon_vpadds_f32: {
12351 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
12352 Value *Vec = EmitScalarExpr(E->getArg(0));
12353 // The vector is v2f32, so make sure it's bitcast to that.
12354 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
12355 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12356 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12357 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12358 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12359 // Pairwise addition of a v2f32 into a scalar f32.
12360 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12361 }
12362 case NEON::BI__builtin_neon_vceqzd_s64:
12363 case NEON::BI__builtin_neon_vceqzd_f64:
12364 case NEON::BI__builtin_neon_vceqzs_f32:
12365 case NEON::BI__builtin_neon_vceqzh_f16:
12366 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12368 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12369 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12370 case NEON::BI__builtin_neon_vcgezd_s64:
12371 case NEON::BI__builtin_neon_vcgezd_f64:
12372 case NEON::BI__builtin_neon_vcgezs_f32:
12373 case NEON::BI__builtin_neon_vcgezh_f16:
12374 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12376 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12377 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12378 case NEON::BI__builtin_neon_vclezd_s64:
12379 case NEON::BI__builtin_neon_vclezd_f64:
12380 case NEON::BI__builtin_neon_vclezs_f32:
12381 case NEON::BI__builtin_neon_vclezh_f16:
12382 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12384 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12385 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12386 case NEON::BI__builtin_neon_vcgtzd_s64:
12387 case NEON::BI__builtin_neon_vcgtzd_f64:
12388 case NEON::BI__builtin_neon_vcgtzs_f32:
12389 case NEON::BI__builtin_neon_vcgtzh_f16:
12390 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12392 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12393 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12394 case NEON::BI__builtin_neon_vcltzd_s64:
12395 case NEON::BI__builtin_neon_vcltzd_f64:
12396 case NEON::BI__builtin_neon_vcltzs_f32:
12397 case NEON::BI__builtin_neon_vcltzh_f16:
12398 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12400 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12401 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12402
12403 case NEON::BI__builtin_neon_vceqzd_u64: {
12404 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12405 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12406 Ops[0] =
12407 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
12408 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
12409 }
12410 case NEON::BI__builtin_neon_vceqd_f64:
12411 case NEON::BI__builtin_neon_vcled_f64:
12412 case NEON::BI__builtin_neon_vcltd_f64:
12413 case NEON::BI__builtin_neon_vcged_f64:
12414 case NEON::BI__builtin_neon_vcgtd_f64: {
12415 llvm::CmpInst::Predicate P;
12416 switch (BuiltinID) {
12417 default: llvm_unreachable("missing builtin ID in switch!");
12418 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
12419 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
12420 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
12421 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
12422 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
12423 }
12424 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12425 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12426 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12427 if (P == llvm::FCmpInst::FCMP_OEQ)
12428 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12429 else
12430 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12431 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
12432 }
12433 case NEON::BI__builtin_neon_vceqs_f32:
12434 case NEON::BI__builtin_neon_vcles_f32:
12435 case NEON::BI__builtin_neon_vclts_f32:
12436 case NEON::BI__builtin_neon_vcges_f32:
12437 case NEON::BI__builtin_neon_vcgts_f32: {
12438 llvm::CmpInst::Predicate P;
12439 switch (BuiltinID) {
12440 default: llvm_unreachable("missing builtin ID in switch!");
12441 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
12442 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
12443 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
12444 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
12445 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
12446 }
12447 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12448 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
12449 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
12450 if (P == llvm::FCmpInst::FCMP_OEQ)
12451 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12452 else
12453 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12454 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
12455 }
12456 case NEON::BI__builtin_neon_vceqh_f16:
12457 case NEON::BI__builtin_neon_vcleh_f16:
12458 case NEON::BI__builtin_neon_vclth_f16:
12459 case NEON::BI__builtin_neon_vcgeh_f16:
12460 case NEON::BI__builtin_neon_vcgth_f16: {
12461 llvm::CmpInst::Predicate P;
12462 switch (BuiltinID) {
12463 default: llvm_unreachable("missing builtin ID in switch!");
12464 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12465 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12466 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12467 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12468 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12469 }
12470 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12471 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12472 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12473 if (P == llvm::FCmpInst::FCMP_OEQ)
12474 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12475 else
12476 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12477 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12478 }
12479 case NEON::BI__builtin_neon_vceqd_s64:
12480 case NEON::BI__builtin_neon_vceqd_u64:
12481 case NEON::BI__builtin_neon_vcgtd_s64:
12482 case NEON::BI__builtin_neon_vcgtd_u64:
12483 case NEON::BI__builtin_neon_vcltd_s64:
12484 case NEON::BI__builtin_neon_vcltd_u64:
12485 case NEON::BI__builtin_neon_vcged_u64:
12486 case NEON::BI__builtin_neon_vcged_s64:
12487 case NEON::BI__builtin_neon_vcled_u64:
12488 case NEON::BI__builtin_neon_vcled_s64: {
12489 llvm::CmpInst::Predicate P;
12490 switch (BuiltinID) {
12491 default: llvm_unreachable("missing builtin ID in switch!");
12492 case NEON::BI__builtin_neon_vceqd_s64:
12493 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12494 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12495 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12496 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12497 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12498 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12499 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12500 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12501 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12502 }
12503 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12504 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12505 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12506 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12507 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12508 }
12509 case NEON::BI__builtin_neon_vtstd_s64:
12510 case NEON::BI__builtin_neon_vtstd_u64: {
12511 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12512 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12513 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12514 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12515 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12516 llvm::Constant::getNullValue(Int64Ty));
12517 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12518 }
12519 case NEON::BI__builtin_neon_vset_lane_i8:
12520 case NEON::BI__builtin_neon_vset_lane_i16:
12521 case NEON::BI__builtin_neon_vset_lane_i32:
12522 case NEON::BI__builtin_neon_vset_lane_i64:
12523 case NEON::BI__builtin_neon_vset_lane_bf16:
12524 case NEON::BI__builtin_neon_vset_lane_f32:
12525 case NEON::BI__builtin_neon_vsetq_lane_i8:
12526 case NEON::BI__builtin_neon_vsetq_lane_i16:
12527 case NEON::BI__builtin_neon_vsetq_lane_i32:
12528 case NEON::BI__builtin_neon_vsetq_lane_i64:
12529 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12530 case NEON::BI__builtin_neon_vsetq_lane_f32:
12531 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12532 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12533 case NEON::BI__builtin_neon_vset_lane_f64:
12534 // The vector type needs a cast for the v1f64 variant.
12535 Ops[1] =
12536 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12537 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12538 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12539 case NEON::BI__builtin_neon_vsetq_lane_f64:
12540 // The vector type needs a cast for the v2f64 variant.
12541 Ops[1] =
12542 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12543 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12544 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12545
12546 case NEON::BI__builtin_neon_vget_lane_i8:
12547 case NEON::BI__builtin_neon_vdupb_lane_i8:
12548 Ops[0] =
12549 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12550 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12551 "vget_lane");
12552 case NEON::BI__builtin_neon_vgetq_lane_i8:
12553 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12554 Ops[0] =
12555 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12556 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12557 "vgetq_lane");
12558 case NEON::BI__builtin_neon_vget_lane_i16:
12559 case NEON::BI__builtin_neon_vduph_lane_i16:
12560 Ops[0] =
12561 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12562 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12563 "vget_lane");
12564 case NEON::BI__builtin_neon_vgetq_lane_i16:
12565 case NEON::BI__builtin_neon_vduph_laneq_i16:
12566 Ops[0] =
12567 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12568 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12569 "vgetq_lane");
12570 case NEON::BI__builtin_neon_vget_lane_i32:
12571 case NEON::BI__builtin_neon_vdups_lane_i32:
12572 Ops[0] =
12573 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12574 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12575 "vget_lane");
12576 case NEON::BI__builtin_neon_vdups_lane_f32:
12577 Ops[0] =
12578 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12579 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12580 "vdups_lane");
12581 case NEON::BI__builtin_neon_vgetq_lane_i32:
12582 case NEON::BI__builtin_neon_vdups_laneq_i32:
12583 Ops[0] =
12584 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12585 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12586 "vgetq_lane");
12587 case NEON::BI__builtin_neon_vget_lane_i64:
12588 case NEON::BI__builtin_neon_vdupd_lane_i64:
12589 Ops[0] =
12590 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12591 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12592 "vget_lane");
12593 case NEON::BI__builtin_neon_vdupd_lane_f64:
12594 Ops[0] =
12595 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12596 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12597 "vdupd_lane");
12598 case NEON::BI__builtin_neon_vgetq_lane_i64:
12599 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12600 Ops[0] =
12601 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12602 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12603 "vgetq_lane");
12604 case NEON::BI__builtin_neon_vget_lane_f32:
12605 Ops[0] =
12606 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12607 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12608 "vget_lane");
12609 case NEON::BI__builtin_neon_vget_lane_f64:
12610 Ops[0] =
12611 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12612 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12613 "vget_lane");
12614 case NEON::BI__builtin_neon_vgetq_lane_f32:
12615 case NEON::BI__builtin_neon_vdups_laneq_f32:
12616 Ops[0] =
12617 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12618 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12619 "vgetq_lane");
12620 case NEON::BI__builtin_neon_vgetq_lane_f64:
12621 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12622 Ops[0] =
12623 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12624 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12625 "vgetq_lane");
12626 case NEON::BI__builtin_neon_vaddh_f16:
12627 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12628 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12629 case NEON::BI__builtin_neon_vsubh_f16:
12630 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12631 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12632 case NEON::BI__builtin_neon_vmulh_f16:
12633 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12634 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12635 case NEON::BI__builtin_neon_vdivh_f16:
12636 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12637 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12638 case NEON::BI__builtin_neon_vfmah_f16:
12639 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12641 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12642 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12643 case NEON::BI__builtin_neon_vfmsh_f16: {
12644 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12645
12646 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12648 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12649 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12650 }
12651 case NEON::BI__builtin_neon_vaddd_s64:
12652 case NEON::BI__builtin_neon_vaddd_u64:
12653 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12654 case NEON::BI__builtin_neon_vsubd_s64:
12655 case NEON::BI__builtin_neon_vsubd_u64:
12656 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12657 case NEON::BI__builtin_neon_vqdmlalh_s16:
12658 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12659 SmallVector<Value *, 2> ProductOps;
12660 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12661 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12662 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12663 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12664 ProductOps, "vqdmlXl");
12665 Constant *CI = ConstantInt::get(SizeTy, 0);
12666 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12667
12668 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12669 ? Intrinsic::aarch64_neon_sqadd
12670 : Intrinsic::aarch64_neon_sqsub;
12671 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12672 }
12673 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12674 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12675 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12676 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12677 Ops, "vqshlu_n");
12678 }
12679 case NEON::BI__builtin_neon_vqshld_n_u64:
12680 case NEON::BI__builtin_neon_vqshld_n_s64: {
12681 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12682 ? Intrinsic::aarch64_neon_uqshl
12683 : Intrinsic::aarch64_neon_sqshl;
12684 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12685 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12686 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12687 }
12688 case NEON::BI__builtin_neon_vrshrd_n_u64:
12689 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12690 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12691 ? Intrinsic::aarch64_neon_urshl
12692 : Intrinsic::aarch64_neon_srshl;
12693 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12694 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12695 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12696 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12697 }
12698 case NEON::BI__builtin_neon_vrsrad_n_u64:
12699 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12700 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12701 ? Intrinsic::aarch64_neon_urshl
12702 : Intrinsic::aarch64_neon_srshl;
12703 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12704 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12705 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12706 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12707 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12708 }
12709 case NEON::BI__builtin_neon_vshld_n_s64:
12710 case NEON::BI__builtin_neon_vshld_n_u64: {
12711 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12712 return Builder.CreateShl(
12713 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12714 }
12715 case NEON::BI__builtin_neon_vshrd_n_s64: {
12716 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12717 return Builder.CreateAShr(
12718 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12719 Amt->getZExtValue())),
12720 "shrd_n");
12721 }
12722 case NEON::BI__builtin_neon_vshrd_n_u64: {
12723 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12724 uint64_t ShiftAmt = Amt->getZExtValue();
12725 // Right-shifting an unsigned value by its size yields 0.
12726 if (ShiftAmt == 64)
12727 return ConstantInt::get(Int64Ty, 0);
12728 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12729 "shrd_n");
12730 }
12731 case NEON::BI__builtin_neon_vsrad_n_s64: {
12732 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12733 Ops[1] = Builder.CreateAShr(
12734 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12735 Amt->getZExtValue())),
12736 "shrd_n");
12737 return Builder.CreateAdd(Ops[0], Ops[1]);
12738 }
12739 case NEON::BI__builtin_neon_vsrad_n_u64: {
12740 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12741 uint64_t ShiftAmt = Amt->getZExtValue();
12742 // Right-shifting an unsigned value by its size yields 0.
12743 // As Op + 0 = Op, return Ops[0] directly.
12744 if (ShiftAmt == 64)
12745 return Ops[0];
12746 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12747 "shrd_n");
12748 return Builder.CreateAdd(Ops[0], Ops[1]);
12749 }
12750 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12751 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12752 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12753 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12754 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12755 "lane");
12756 SmallVector<Value *, 2> ProductOps;
12757 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12758 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12759 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12760 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12761 ProductOps, "vqdmlXl");
12762 Constant *CI = ConstantInt::get(SizeTy, 0);
12763 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12764 Ops.pop_back();
12765
12766 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12767 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12768 ? Intrinsic::aarch64_neon_sqadd
12769 : Intrinsic::aarch64_neon_sqsub;
12770 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12771 }
12772 case NEON::BI__builtin_neon_vqdmlals_s32:
12773 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12774 SmallVector<Value *, 2> ProductOps;
12775 ProductOps.push_back(Ops[1]);
12776 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12777 Ops[1] =
12778 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12779 ProductOps, "vqdmlXl");
12780
12781 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12782 ? Intrinsic::aarch64_neon_sqadd
12783 : Intrinsic::aarch64_neon_sqsub;
12784 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12785 }
12786 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12787 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12788 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12789 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12790 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12791 "lane");
12792 SmallVector<Value *, 2> ProductOps;
12793 ProductOps.push_back(Ops[1]);
12794 ProductOps.push_back(Ops[2]);
12795 Ops[1] =
12796 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12797 ProductOps, "vqdmlXl");
12798 Ops.pop_back();
12799
12800 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12801 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12802 ? Intrinsic::aarch64_neon_sqadd
12803 : Intrinsic::aarch64_neon_sqsub;
12804 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12805 }
12806 case NEON::BI__builtin_neon_vget_lane_bf16:
12807 case NEON::BI__builtin_neon_vduph_lane_bf16:
12808 case NEON::BI__builtin_neon_vduph_lane_f16: {
12809 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12810 "vget_lane");
12811 }
12812 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12813 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12814 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12815 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12816 "vgetq_lane");
12817 }
12818 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12819 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12820 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12821 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12822 }
12823 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12824 SmallVector<int, 16> ConcatMask(8);
12825 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12826 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12827 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12828 llvm::Value *Trunc =
12829 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12830 return Builder.CreateShuffleVector(
12831 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12832 }
12833 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12834 SmallVector<int, 16> ConcatMask(8);
12835 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12836 SmallVector<int, 16> LoMask(4);
12837 std::iota(LoMask.begin(), LoMask.end(), 0);
12838 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12839 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12840 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
12841 llvm::Value *Inactive = Builder.CreateShuffleVector(
12842 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12843 llvm::Value *Trunc =
12844 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12845 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12846 }
12847
12848 case clang::AArch64::BI_InterlockedAdd:
12849 case clang::AArch64::BI_InterlockedAdd64: {
12850 Address DestAddr = CheckAtomicAlignment(*this, E);
12851 Value *Val = EmitScalarExpr(E->getArg(1));
12852 AtomicRMWInst *RMWI =
12853 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12854 llvm::AtomicOrdering::SequentiallyConsistent);
12855 return Builder.CreateAdd(RMWI, Val);
12856 }
12857 }
12858
12859 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12860 llvm::Type *Ty = VTy;
12861 if (!Ty)
12862 return nullptr;
12863
12864 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12865 // defer to common code if it's been added to our special map.
12868
12869 if (Builtin)
12871 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12872 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12873 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12874
12875 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12876 return V;
12877
12878 unsigned Int;
12879 switch (BuiltinID) {
12880 default: return nullptr;
12881 case NEON::BI__builtin_neon_vbsl_v:
12882 case NEON::BI__builtin_neon_vbslq_v: {
12883 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12884 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12885 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12886 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12887
12888 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12889 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12890 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12891 return Builder.CreateBitCast(Ops[0], Ty);
12892 }
12893 case NEON::BI__builtin_neon_vfma_lane_v:
12894 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12895 // The ARM builtins (and instructions) have the addend as the first
12896 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12897 Value *Addend = Ops[0];
12898 Value *Multiplicand = Ops[1];
12899 Value *LaneSource = Ops[2];
12900 Ops[0] = Multiplicand;
12901 Ops[1] = LaneSource;
12902 Ops[2] = Addend;
12903
12904 // Now adjust things to handle the lane access.
12905 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12906 ? llvm::FixedVectorType::get(VTy->getElementType(),
12907 VTy->getNumElements() / 2)
12908 : VTy;
12909 llvm::Constant *cst = cast<Constant>(Ops[3]);
12910 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12911 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12912 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12913
12914 Ops.pop_back();
12915 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12916 : Intrinsic::fma;
12917 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12918 }
12919 case NEON::BI__builtin_neon_vfma_laneq_v: {
12920 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12921 // v1f64 fma should be mapped to Neon scalar f64 fma
12922 if (VTy && VTy->getElementType() == DoubleTy) {
12923 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12924 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12925 llvm::FixedVectorType *VTy =
12927 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12928 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12929 Value *Result;
12931 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12932 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12933 return Builder.CreateBitCast(Result, Ty);
12934 }
12935 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12936 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12937
12938 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12939 VTy->getNumElements() * 2);
12940 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12941 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12942 cast<ConstantInt>(Ops[3]));
12943 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12944
12946 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12947 {Ops[2], Ops[1], Ops[0]});
12948 }
12949 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12950 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12951 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12952
12953 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12954 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12956 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12957 {Ops[2], Ops[1], Ops[0]});
12958 }
12959 case NEON::BI__builtin_neon_vfmah_lane_f16:
12960 case NEON::BI__builtin_neon_vfmas_lane_f32:
12961 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12962 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12963 case NEON::BI__builtin_neon_vfmad_lane_f64:
12964 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12965 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12966 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12967 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12969 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12970 {Ops[1], Ops[2], Ops[0]});
12971 }
12972 case NEON::BI__builtin_neon_vmull_v:
12973 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12974 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12975 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12976 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12977 case NEON::BI__builtin_neon_vmax_v:
12978 case NEON::BI__builtin_neon_vmaxq_v:
12979 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12980 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12981 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12982 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12983 case NEON::BI__builtin_neon_vmaxh_f16: {
12984 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12985 Int = Intrinsic::aarch64_neon_fmax;
12986 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12987 }
12988 case NEON::BI__builtin_neon_vmin_v:
12989 case NEON::BI__builtin_neon_vminq_v:
12990 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12991 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12992 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12993 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12994 case NEON::BI__builtin_neon_vminh_f16: {
12995 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12996 Int = Intrinsic::aarch64_neon_fmin;
12997 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12998 }
12999 case NEON::BI__builtin_neon_vabd_v:
13000 case NEON::BI__builtin_neon_vabdq_v:
13001 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
13002 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
13003 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
13004 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
13005 case NEON::BI__builtin_neon_vpadal_v:
13006 case NEON::BI__builtin_neon_vpadalq_v: {
13007 unsigned ArgElts = VTy->getNumElements();
13008 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
13009 unsigned BitWidth = EltTy->getBitWidth();
13010 auto *ArgTy = llvm::FixedVectorType::get(
13011 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
13012 llvm::Type* Tys[2] = { VTy, ArgTy };
13013 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
13015 TmpOps.push_back(Ops[1]);
13016 Function *F = CGM.getIntrinsic(Int, Tys);
13017 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
13018 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
13019 return Builder.CreateAdd(tmp, addend);
13020 }
13021 case NEON::BI__builtin_neon_vpmin_v:
13022 case NEON::BI__builtin_neon_vpminq_v:
13023 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
13024 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
13025 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
13026 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
13027 case NEON::BI__builtin_neon_vpmax_v:
13028 case NEON::BI__builtin_neon_vpmaxq_v:
13029 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
13030 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
13031 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
13032 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
13033 case NEON::BI__builtin_neon_vminnm_v:
13034 case NEON::BI__builtin_neon_vminnmq_v:
13035 Int = Intrinsic::aarch64_neon_fminnm;
13036 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
13037 case NEON::BI__builtin_neon_vminnmh_f16:
13038 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13039 Int = Intrinsic::aarch64_neon_fminnm;
13040 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
13041 case NEON::BI__builtin_neon_vmaxnm_v:
13042 case NEON::BI__builtin_neon_vmaxnmq_v:
13043 Int = Intrinsic::aarch64_neon_fmaxnm;
13044 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
13045 case NEON::BI__builtin_neon_vmaxnmh_f16:
13046 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13047 Int = Intrinsic::aarch64_neon_fmaxnm;
13048 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
13049 case NEON::BI__builtin_neon_vrecpss_f32: {
13050 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13051 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
13052 Ops, "vrecps");
13053 }
13054 case NEON::BI__builtin_neon_vrecpsd_f64:
13055 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13056 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
13057 Ops, "vrecps");
13058 case NEON::BI__builtin_neon_vrecpsh_f16:
13059 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13060 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
13061 Ops, "vrecps");
13062 case NEON::BI__builtin_neon_vqshrun_n_v:
13063 Int = Intrinsic::aarch64_neon_sqshrun;
13064 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
13065 case NEON::BI__builtin_neon_vqrshrun_n_v:
13066 Int = Intrinsic::aarch64_neon_sqrshrun;
13067 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
13068 case NEON::BI__builtin_neon_vqshrn_n_v:
13069 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
13070 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
13071 case NEON::BI__builtin_neon_vrshrn_n_v:
13072 Int = Intrinsic::aarch64_neon_rshrn;
13073 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
13074 case NEON::BI__builtin_neon_vqrshrn_n_v:
13075 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
13076 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
13077 case NEON::BI__builtin_neon_vrndah_f16: {
13078 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13079 Int = Builder.getIsFPConstrained()
13080 ? Intrinsic::experimental_constrained_round
13081 : Intrinsic::round;
13082 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
13083 }
13084 case NEON::BI__builtin_neon_vrnda_v:
13085 case NEON::BI__builtin_neon_vrndaq_v: {
13086 Int = Builder.getIsFPConstrained()
13087 ? Intrinsic::experimental_constrained_round
13088 : Intrinsic::round;
13089 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
13090 }
13091 case NEON::BI__builtin_neon_vrndih_f16: {
13092 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13093 Int = Builder.getIsFPConstrained()
13094 ? Intrinsic::experimental_constrained_nearbyint
13095 : Intrinsic::nearbyint;
13096 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
13097 }
13098 case NEON::BI__builtin_neon_vrndmh_f16: {
13099 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13100 Int = Builder.getIsFPConstrained()
13101 ? Intrinsic::experimental_constrained_floor
13102 : Intrinsic::floor;
13103 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
13104 }
13105 case NEON::BI__builtin_neon_vrndm_v:
13106 case NEON::BI__builtin_neon_vrndmq_v: {
13107 Int = Builder.getIsFPConstrained()
13108 ? Intrinsic::experimental_constrained_floor
13109 : Intrinsic::floor;
13110 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
13111 }
13112 case NEON::BI__builtin_neon_vrndnh_f16: {
13113 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13114 Int = Builder.getIsFPConstrained()
13115 ? Intrinsic::experimental_constrained_roundeven
13116 : Intrinsic::roundeven;
13117 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
13118 }
13119 case NEON::BI__builtin_neon_vrndn_v:
13120 case NEON::BI__builtin_neon_vrndnq_v: {
13121 Int = Builder.getIsFPConstrained()
13122 ? Intrinsic::experimental_constrained_roundeven
13123 : Intrinsic::roundeven;
13124 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
13125 }
13126 case NEON::BI__builtin_neon_vrndns_f32: {
13127 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13128 Int = Builder.getIsFPConstrained()
13129 ? Intrinsic::experimental_constrained_roundeven
13130 : Intrinsic::roundeven;
13131 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
13132 }
13133 case NEON::BI__builtin_neon_vrndph_f16: {
13134 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13135 Int = Builder.getIsFPConstrained()
13136 ? Intrinsic::experimental_constrained_ceil
13137 : Intrinsic::ceil;
13138 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
13139 }
13140 case NEON::BI__builtin_neon_vrndp_v:
13141 case NEON::BI__builtin_neon_vrndpq_v: {
13142 Int = Builder.getIsFPConstrained()
13143 ? Intrinsic::experimental_constrained_ceil
13144 : Intrinsic::ceil;
13145 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
13146 }
13147 case NEON::BI__builtin_neon_vrndxh_f16: {
13148 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13149 Int = Builder.getIsFPConstrained()
13150 ? Intrinsic::experimental_constrained_rint
13151 : Intrinsic::rint;
13152 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
13153 }
13154 case NEON::BI__builtin_neon_vrndx_v:
13155 case NEON::BI__builtin_neon_vrndxq_v: {
13156 Int = Builder.getIsFPConstrained()
13157 ? Intrinsic::experimental_constrained_rint
13158 : Intrinsic::rint;
13159 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
13160 }
13161 case NEON::BI__builtin_neon_vrndh_f16: {
13162 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13163 Int = Builder.getIsFPConstrained()
13164 ? Intrinsic::experimental_constrained_trunc
13165 : Intrinsic::trunc;
13166 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
13167 }
13168 case NEON::BI__builtin_neon_vrnd32x_f32:
13169 case NEON::BI__builtin_neon_vrnd32xq_f32:
13170 case NEON::BI__builtin_neon_vrnd32x_f64:
13171 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13172 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13173 Int = Intrinsic::aarch64_neon_frint32x;
13174 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
13175 }
13176 case NEON::BI__builtin_neon_vrnd32z_f32:
13177 case NEON::BI__builtin_neon_vrnd32zq_f32:
13178 case NEON::BI__builtin_neon_vrnd32z_f64:
13179 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13180 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13181 Int = Intrinsic::aarch64_neon_frint32z;
13182 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
13183 }
13184 case NEON::BI__builtin_neon_vrnd64x_f32:
13185 case NEON::BI__builtin_neon_vrnd64xq_f32:
13186 case NEON::BI__builtin_neon_vrnd64x_f64:
13187 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13188 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13189 Int = Intrinsic::aarch64_neon_frint64x;
13190 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
13191 }
13192 case NEON::BI__builtin_neon_vrnd64z_f32:
13193 case NEON::BI__builtin_neon_vrnd64zq_f32:
13194 case NEON::BI__builtin_neon_vrnd64z_f64:
13195 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13196 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13197 Int = Intrinsic::aarch64_neon_frint64z;
13198 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
13199 }
13200 case NEON::BI__builtin_neon_vrnd_v:
13201 case NEON::BI__builtin_neon_vrndq_v: {
13202 Int = Builder.getIsFPConstrained()
13203 ? Intrinsic::experimental_constrained_trunc
13204 : Intrinsic::trunc;
13205 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
13206 }
13207 case NEON::BI__builtin_neon_vcvt_f64_v:
13208 case NEON::BI__builtin_neon_vcvtq_f64_v:
13209 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13210 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
13211 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
13212 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
13213 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13214 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
13215 "unexpected vcvt_f64_f32 builtin");
13216 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
13217 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13218
13219 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
13220 }
13221 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13222 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
13223 "unexpected vcvt_f32_f64 builtin");
13224 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
13225 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13226
13227 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
13228 }
13229 case NEON::BI__builtin_neon_vcvt_s32_v:
13230 case NEON::BI__builtin_neon_vcvt_u32_v:
13231 case NEON::BI__builtin_neon_vcvt_s64_v:
13232 case NEON::BI__builtin_neon_vcvt_u64_v:
13233 case NEON::BI__builtin_neon_vcvt_s16_f16:
13234 case NEON::BI__builtin_neon_vcvt_u16_f16:
13235 case NEON::BI__builtin_neon_vcvtq_s32_v:
13236 case NEON::BI__builtin_neon_vcvtq_u32_v:
13237 case NEON::BI__builtin_neon_vcvtq_s64_v:
13238 case NEON::BI__builtin_neon_vcvtq_u64_v:
13239 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13240 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13241 Int =
13242 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13243 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
13244 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
13245 }
13246 case NEON::BI__builtin_neon_vcvta_s16_f16:
13247 case NEON::BI__builtin_neon_vcvta_u16_f16:
13248 case NEON::BI__builtin_neon_vcvta_s32_v:
13249 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13250 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13251 case NEON::BI__builtin_neon_vcvta_u32_v:
13252 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13253 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13254 case NEON::BI__builtin_neon_vcvta_s64_v:
13255 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13256 case NEON::BI__builtin_neon_vcvta_u64_v:
13257 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13258 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13259 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13260 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
13261 }
13262 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13263 case NEON::BI__builtin_neon_vcvtm_s32_v:
13264 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13265 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13266 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13267 case NEON::BI__builtin_neon_vcvtm_u32_v:
13268 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13269 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13270 case NEON::BI__builtin_neon_vcvtm_s64_v:
13271 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13272 case NEON::BI__builtin_neon_vcvtm_u64_v:
13273 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13274 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13275 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13276 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
13277 }
13278 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13279 case NEON::BI__builtin_neon_vcvtn_s32_v:
13280 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13281 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13282 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13283 case NEON::BI__builtin_neon_vcvtn_u32_v:
13284 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13285 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13286 case NEON::BI__builtin_neon_vcvtn_s64_v:
13287 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13288 case NEON::BI__builtin_neon_vcvtn_u64_v:
13289 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13290 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13291 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13292 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
13293 }
13294 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13295 case NEON::BI__builtin_neon_vcvtp_s32_v:
13296 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13297 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13298 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13299 case NEON::BI__builtin_neon_vcvtp_u32_v:
13300 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13301 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13302 case NEON::BI__builtin_neon_vcvtp_s64_v:
13303 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13304 case NEON::BI__builtin_neon_vcvtp_u64_v:
13305 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13306 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13307 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13308 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
13309 }
13310 case NEON::BI__builtin_neon_vmulx_v:
13311 case NEON::BI__builtin_neon_vmulxq_v: {
13312 Int = Intrinsic::aarch64_neon_fmulx;
13313 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
13314 }
13315 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13316 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13317 // vmulx_lane should be mapped to Neon scalar mulx after
13318 // extracting the scalar element
13319 Ops.push_back(EmitScalarExpr(E->getArg(2)));
13320 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13321 Ops.pop_back();
13322 Int = Intrinsic::aarch64_neon_fmulx;
13323 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
13324 }
13325 case NEON::BI__builtin_neon_vmul_lane_v:
13326 case NEON::BI__builtin_neon_vmul_laneq_v: {
13327 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13328 bool Quad = false;
13329 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13330 Quad = true;
13331 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13332 llvm::FixedVectorType *VTy =
13334 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13335 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13336 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
13337 return Builder.CreateBitCast(Result, Ty);
13338 }
13339 case NEON::BI__builtin_neon_vnegd_s64:
13340 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
13341 case NEON::BI__builtin_neon_vnegh_f16:
13342 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
13343 case NEON::BI__builtin_neon_vpmaxnm_v:
13344 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13345 Int = Intrinsic::aarch64_neon_fmaxnmp;
13346 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
13347 }
13348 case NEON::BI__builtin_neon_vpminnm_v:
13349 case NEON::BI__builtin_neon_vpminnmq_v: {
13350 Int = Intrinsic::aarch64_neon_fminnmp;
13351 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
13352 }
13353 case NEON::BI__builtin_neon_vsqrth_f16: {
13354 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13355 Int = Builder.getIsFPConstrained()
13356 ? Intrinsic::experimental_constrained_sqrt
13357 : Intrinsic::sqrt;
13358 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
13359 }
13360 case NEON::BI__builtin_neon_vsqrt_v:
13361 case NEON::BI__builtin_neon_vsqrtq_v: {
13362 Int = Builder.getIsFPConstrained()
13363 ? Intrinsic::experimental_constrained_sqrt
13364 : Intrinsic::sqrt;
13365 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13366 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
13367 }
13368 case NEON::BI__builtin_neon_vrbit_v:
13369 case NEON::BI__builtin_neon_vrbitq_v: {
13370 Int = Intrinsic::bitreverse;
13371 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
13372 }
13373 case NEON::BI__builtin_neon_vaddv_u8:
13374 // FIXME: These are handled by the AArch64 scalar code.
13375 usgn = true;
13376 [[fallthrough]];
13377 case NEON::BI__builtin_neon_vaddv_s8: {
13378 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13379 Ty = Int32Ty;
13380 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13381 llvm::Type *Tys[2] = { Ty, VTy };
13382 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13383 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13384 return Builder.CreateTrunc(Ops[0], Int8Ty);
13385 }
13386 case NEON::BI__builtin_neon_vaddv_u16:
13387 usgn = true;
13388 [[fallthrough]];
13389 case NEON::BI__builtin_neon_vaddv_s16: {
13390 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13391 Ty = Int32Ty;
13392 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13393 llvm::Type *Tys[2] = { Ty, VTy };
13394 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13395 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13396 return Builder.CreateTrunc(Ops[0], Int16Ty);
13397 }
13398 case NEON::BI__builtin_neon_vaddvq_u8:
13399 usgn = true;
13400 [[fallthrough]];
13401 case NEON::BI__builtin_neon_vaddvq_s8: {
13402 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13403 Ty = Int32Ty;
13404 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13405 llvm::Type *Tys[2] = { Ty, VTy };
13406 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13407 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13408 return Builder.CreateTrunc(Ops[0], Int8Ty);
13409 }
13410 case NEON::BI__builtin_neon_vaddvq_u16:
13411 usgn = true;
13412 [[fallthrough]];
13413 case NEON::BI__builtin_neon_vaddvq_s16: {
13414 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13415 Ty = Int32Ty;
13416 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13417 llvm::Type *Tys[2] = { Ty, VTy };
13418 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13419 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13420 return Builder.CreateTrunc(Ops[0], Int16Ty);
13421 }
13422 case NEON::BI__builtin_neon_vmaxv_u8: {
13423 Int = Intrinsic::aarch64_neon_umaxv;
13424 Ty = Int32Ty;
13425 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13426 llvm::Type *Tys[2] = { Ty, VTy };
13427 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13428 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13429 return Builder.CreateTrunc(Ops[0], Int8Ty);
13430 }
13431 case NEON::BI__builtin_neon_vmaxv_u16: {
13432 Int = Intrinsic::aarch64_neon_umaxv;
13433 Ty = Int32Ty;
13434 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13435 llvm::Type *Tys[2] = { Ty, VTy };
13436 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13437 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13438 return Builder.CreateTrunc(Ops[0], Int16Ty);
13439 }
13440 case NEON::BI__builtin_neon_vmaxvq_u8: {
13441 Int = Intrinsic::aarch64_neon_umaxv;
13442 Ty = Int32Ty;
13443 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13444 llvm::Type *Tys[2] = { Ty, VTy };
13445 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13446 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13447 return Builder.CreateTrunc(Ops[0], Int8Ty);
13448 }
13449 case NEON::BI__builtin_neon_vmaxvq_u16: {
13450 Int = Intrinsic::aarch64_neon_umaxv;
13451 Ty = Int32Ty;
13452 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13453 llvm::Type *Tys[2] = { Ty, VTy };
13454 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13455 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13456 return Builder.CreateTrunc(Ops[0], Int16Ty);
13457 }
13458 case NEON::BI__builtin_neon_vmaxv_s8: {
13459 Int = Intrinsic::aarch64_neon_smaxv;
13460 Ty = Int32Ty;
13461 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13462 llvm::Type *Tys[2] = { Ty, VTy };
13463 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13464 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13465 return Builder.CreateTrunc(Ops[0], Int8Ty);
13466 }
13467 case NEON::BI__builtin_neon_vmaxv_s16: {
13468 Int = Intrinsic::aarch64_neon_smaxv;
13469 Ty = Int32Ty;
13470 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13471 llvm::Type *Tys[2] = { Ty, VTy };
13472 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13473 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13474 return Builder.CreateTrunc(Ops[0], Int16Ty);
13475 }
13476 case NEON::BI__builtin_neon_vmaxvq_s8: {
13477 Int = Intrinsic::aarch64_neon_smaxv;
13478 Ty = Int32Ty;
13479 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13480 llvm::Type *Tys[2] = { Ty, VTy };
13481 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13482 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13483 return Builder.CreateTrunc(Ops[0], Int8Ty);
13484 }
13485 case NEON::BI__builtin_neon_vmaxvq_s16: {
13486 Int = Intrinsic::aarch64_neon_smaxv;
13487 Ty = Int32Ty;
13488 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13489 llvm::Type *Tys[2] = { Ty, VTy };
13490 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13491 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13492 return Builder.CreateTrunc(Ops[0], Int16Ty);
13493 }
13494 case NEON::BI__builtin_neon_vmaxv_f16: {
13495 Int = Intrinsic::aarch64_neon_fmaxv;
13496 Ty = HalfTy;
13497 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13498 llvm::Type *Tys[2] = { Ty, VTy };
13499 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13500 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13501 return Builder.CreateTrunc(Ops[0], HalfTy);
13502 }
13503 case NEON::BI__builtin_neon_vmaxvq_f16: {
13504 Int = Intrinsic::aarch64_neon_fmaxv;
13505 Ty = HalfTy;
13506 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13507 llvm::Type *Tys[2] = { Ty, VTy };
13508 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13509 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13510 return Builder.CreateTrunc(Ops[0], HalfTy);
13511 }
13512 case NEON::BI__builtin_neon_vminv_u8: {
13513 Int = Intrinsic::aarch64_neon_uminv;
13514 Ty = Int32Ty;
13515 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13516 llvm::Type *Tys[2] = { Ty, VTy };
13517 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13518 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13519 return Builder.CreateTrunc(Ops[0], Int8Ty);
13520 }
13521 case NEON::BI__builtin_neon_vminv_u16: {
13522 Int = Intrinsic::aarch64_neon_uminv;
13523 Ty = Int32Ty;
13524 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13525 llvm::Type *Tys[2] = { Ty, VTy };
13526 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13527 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13528 return Builder.CreateTrunc(Ops[0], Int16Ty);
13529 }
13530 case NEON::BI__builtin_neon_vminvq_u8: {
13531 Int = Intrinsic::aarch64_neon_uminv;
13532 Ty = Int32Ty;
13533 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13534 llvm::Type *Tys[2] = { Ty, VTy };
13535 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13536 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13537 return Builder.CreateTrunc(Ops[0], Int8Ty);
13538 }
13539 case NEON::BI__builtin_neon_vminvq_u16: {
13540 Int = Intrinsic::aarch64_neon_uminv;
13541 Ty = Int32Ty;
13542 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13543 llvm::Type *Tys[2] = { Ty, VTy };
13544 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13545 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13546 return Builder.CreateTrunc(Ops[0], Int16Ty);
13547 }
13548 case NEON::BI__builtin_neon_vminv_s8: {
13549 Int = Intrinsic::aarch64_neon_sminv;
13550 Ty = Int32Ty;
13551 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13552 llvm::Type *Tys[2] = { Ty, VTy };
13553 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13554 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13555 return Builder.CreateTrunc(Ops[0], Int8Ty);
13556 }
13557 case NEON::BI__builtin_neon_vminv_s16: {
13558 Int = Intrinsic::aarch64_neon_sminv;
13559 Ty = Int32Ty;
13560 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13561 llvm::Type *Tys[2] = { Ty, VTy };
13562 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13563 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13564 return Builder.CreateTrunc(Ops[0], Int16Ty);
13565 }
13566 case NEON::BI__builtin_neon_vminvq_s8: {
13567 Int = Intrinsic::aarch64_neon_sminv;
13568 Ty = Int32Ty;
13569 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13570 llvm::Type *Tys[2] = { Ty, VTy };
13571 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13572 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13573 return Builder.CreateTrunc(Ops[0], Int8Ty);
13574 }
13575 case NEON::BI__builtin_neon_vminvq_s16: {
13576 Int = Intrinsic::aarch64_neon_sminv;
13577 Ty = Int32Ty;
13578 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13579 llvm::Type *Tys[2] = { Ty, VTy };
13580 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13581 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13582 return Builder.CreateTrunc(Ops[0], Int16Ty);
13583 }
13584 case NEON::BI__builtin_neon_vminv_f16: {
13585 Int = Intrinsic::aarch64_neon_fminv;
13586 Ty = HalfTy;
13587 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13588 llvm::Type *Tys[2] = { Ty, VTy };
13589 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13590 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13591 return Builder.CreateTrunc(Ops[0], HalfTy);
13592 }
13593 case NEON::BI__builtin_neon_vminvq_f16: {
13594 Int = Intrinsic::aarch64_neon_fminv;
13595 Ty = HalfTy;
13596 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13597 llvm::Type *Tys[2] = { Ty, VTy };
13598 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13599 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13600 return Builder.CreateTrunc(Ops[0], HalfTy);
13601 }
13602 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13603 Int = Intrinsic::aarch64_neon_fmaxnmv;
13604 Ty = HalfTy;
13605 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13606 llvm::Type *Tys[2] = { Ty, VTy };
13607 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13608 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13609 return Builder.CreateTrunc(Ops[0], HalfTy);
13610 }
13611 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13612 Int = Intrinsic::aarch64_neon_fmaxnmv;
13613 Ty = HalfTy;
13614 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13615 llvm::Type *Tys[2] = { Ty, VTy };
13616 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13617 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13618 return Builder.CreateTrunc(Ops[0], HalfTy);
13619 }
13620 case NEON::BI__builtin_neon_vminnmv_f16: {
13621 Int = Intrinsic::aarch64_neon_fminnmv;
13622 Ty = HalfTy;
13623 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13624 llvm::Type *Tys[2] = { Ty, VTy };
13625 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13626 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13627 return Builder.CreateTrunc(Ops[0], HalfTy);
13628 }
13629 case NEON::BI__builtin_neon_vminnmvq_f16: {
13630 Int = Intrinsic::aarch64_neon_fminnmv;
13631 Ty = HalfTy;
13632 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13633 llvm::Type *Tys[2] = { Ty, VTy };
13634 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13635 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13636 return Builder.CreateTrunc(Ops[0], HalfTy);
13637 }
13638 case NEON::BI__builtin_neon_vmul_n_f64: {
13639 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13640 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13641 return Builder.CreateFMul(Ops[0], RHS);
13642 }
13643 case NEON::BI__builtin_neon_vaddlv_u8: {
13644 Int = Intrinsic::aarch64_neon_uaddlv;
13645 Ty = Int32Ty;
13646 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13647 llvm::Type *Tys[2] = { Ty, VTy };
13648 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13649 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13650 return Builder.CreateTrunc(Ops[0], Int16Ty);
13651 }
13652 case NEON::BI__builtin_neon_vaddlv_u16: {
13653 Int = Intrinsic::aarch64_neon_uaddlv;
13654 Ty = Int32Ty;
13655 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13656 llvm::Type *Tys[2] = { Ty, VTy };
13657 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13658 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13659 }
13660 case NEON::BI__builtin_neon_vaddlvq_u8: {
13661 Int = Intrinsic::aarch64_neon_uaddlv;
13662 Ty = Int32Ty;
13663 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13664 llvm::Type *Tys[2] = { Ty, VTy };
13665 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13666 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13667 return Builder.CreateTrunc(Ops[0], Int16Ty);
13668 }
13669 case NEON::BI__builtin_neon_vaddlvq_u16: {
13670 Int = Intrinsic::aarch64_neon_uaddlv;
13671 Ty = Int32Ty;
13672 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13673 llvm::Type *Tys[2] = { Ty, VTy };
13674 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13675 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13676 }
13677 case NEON::BI__builtin_neon_vaddlv_s8: {
13678 Int = Intrinsic::aarch64_neon_saddlv;
13679 Ty = Int32Ty;
13680 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13681 llvm::Type *Tys[2] = { Ty, VTy };
13682 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13683 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13684 return Builder.CreateTrunc(Ops[0], Int16Ty);
13685 }
13686 case NEON::BI__builtin_neon_vaddlv_s16: {
13687 Int = Intrinsic::aarch64_neon_saddlv;
13688 Ty = Int32Ty;
13689 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13690 llvm::Type *Tys[2] = { Ty, VTy };
13691 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13692 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13693 }
13694 case NEON::BI__builtin_neon_vaddlvq_s8: {
13695 Int = Intrinsic::aarch64_neon_saddlv;
13696 Ty = Int32Ty;
13697 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13698 llvm::Type *Tys[2] = { Ty, VTy };
13699 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13700 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13701 return Builder.CreateTrunc(Ops[0], Int16Ty);
13702 }
13703 case NEON::BI__builtin_neon_vaddlvq_s16: {
13704 Int = Intrinsic::aarch64_neon_saddlv;
13705 Ty = Int32Ty;
13706 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13707 llvm::Type *Tys[2] = { Ty, VTy };
13708 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13709 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13710 }
13711 case NEON::BI__builtin_neon_vsri_n_v:
13712 case NEON::BI__builtin_neon_vsriq_n_v: {
13713 Int = Intrinsic::aarch64_neon_vsri;
13714 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13715 return EmitNeonCall(Intrin, Ops, "vsri_n");
13716 }
13717 case NEON::BI__builtin_neon_vsli_n_v:
13718 case NEON::BI__builtin_neon_vsliq_n_v: {
13719 Int = Intrinsic::aarch64_neon_vsli;
13720 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13721 return EmitNeonCall(Intrin, Ops, "vsli_n");
13722 }
13723 case NEON::BI__builtin_neon_vsra_n_v:
13724 case NEON::BI__builtin_neon_vsraq_n_v:
13725 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13726 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13727 return Builder.CreateAdd(Ops[0], Ops[1]);
13728 case NEON::BI__builtin_neon_vrsra_n_v:
13729 case NEON::BI__builtin_neon_vrsraq_n_v: {
13730 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13732 TmpOps.push_back(Ops[1]);
13733 TmpOps.push_back(Ops[2]);
13734 Function* F = CGM.getIntrinsic(Int, Ty);
13735 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13736 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13737 return Builder.CreateAdd(Ops[0], tmp);
13738 }
13739 case NEON::BI__builtin_neon_vld1_v:
13740 case NEON::BI__builtin_neon_vld1q_v: {
13741 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13742 }
13743 case NEON::BI__builtin_neon_vst1_v:
13744 case NEON::BI__builtin_neon_vst1q_v:
13745 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13746 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13747 case NEON::BI__builtin_neon_vld1_lane_v:
13748 case NEON::BI__builtin_neon_vld1q_lane_v: {
13749 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13750 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13751 PtrOp0.getAlignment());
13752 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13753 }
13754 case NEON::BI__builtin_neon_vldap1_lane_s64:
13755 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13756 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13757 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13758 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13759 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13760 Ops[0] = LI;
13761 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13762 }
13763 case NEON::BI__builtin_neon_vld1_dup_v:
13764 case NEON::BI__builtin_neon_vld1q_dup_v: {
13765 Value *V = PoisonValue::get(Ty);
13766 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13767 PtrOp0.getAlignment());
13768 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13769 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13770 return EmitNeonSplat(Ops[0], CI);
13771 }
13772 case NEON::BI__builtin_neon_vst1_lane_v:
13773 case NEON::BI__builtin_neon_vst1q_lane_v:
13774 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13775 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13776 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13777 case NEON::BI__builtin_neon_vstl1_lane_s64:
13778 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13779 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13780 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13781 llvm::StoreInst *SI =
13782 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13783 SI->setAtomic(llvm::AtomicOrdering::Release);
13784 return SI;
13785 }
13786 case NEON::BI__builtin_neon_vld2_v:
13787 case NEON::BI__builtin_neon_vld2q_v: {
13788 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13789 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13790 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13791 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13792 }
13793 case NEON::BI__builtin_neon_vld3_v:
13794 case NEON::BI__builtin_neon_vld3q_v: {
13795 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13796 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13797 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13798 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13799 }
13800 case NEON::BI__builtin_neon_vld4_v:
13801 case NEON::BI__builtin_neon_vld4q_v: {
13802 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13803 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13804 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13805 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13806 }
13807 case NEON::BI__builtin_neon_vld2_dup_v:
13808 case NEON::BI__builtin_neon_vld2q_dup_v: {
13809 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13810 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13811 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13812 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13813 }
13814 case NEON::BI__builtin_neon_vld3_dup_v:
13815 case NEON::BI__builtin_neon_vld3q_dup_v: {
13816 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13817 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13818 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13819 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13820 }
13821 case NEON::BI__builtin_neon_vld4_dup_v:
13822 case NEON::BI__builtin_neon_vld4q_dup_v: {
13823 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13824 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13825 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13826 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13827 }
13828 case NEON::BI__builtin_neon_vld2_lane_v:
13829 case NEON::BI__builtin_neon_vld2q_lane_v: {
13830 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13831 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13832 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13833 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13834 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13835 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13836 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13837 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13838 }
13839 case NEON::BI__builtin_neon_vld3_lane_v:
13840 case NEON::BI__builtin_neon_vld3q_lane_v: {
13841 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13842 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13843 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13844 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13845 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13846 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13847 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13848 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13849 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13850 }
13851 case NEON::BI__builtin_neon_vld4_lane_v:
13852 case NEON::BI__builtin_neon_vld4q_lane_v: {
13853 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13854 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13855 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13856 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13857 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13858 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13859 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13860 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13861 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13862 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13863 }
13864 case NEON::BI__builtin_neon_vst2_v:
13865 case NEON::BI__builtin_neon_vst2q_v: {
13866 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13867 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13868 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13869 Ops, "");
13870 }
13871 case NEON::BI__builtin_neon_vst2_lane_v:
13872 case NEON::BI__builtin_neon_vst2q_lane_v: {
13873 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13874 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13875 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13876 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13877 Ops, "");
13878 }
13879 case NEON::BI__builtin_neon_vst3_v:
13880 case NEON::BI__builtin_neon_vst3q_v: {
13881 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13882 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13883 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13884 Ops, "");
13885 }
13886 case NEON::BI__builtin_neon_vst3_lane_v:
13887 case NEON::BI__builtin_neon_vst3q_lane_v: {
13888 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13889 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13890 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13891 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13892 Ops, "");
13893 }
13894 case NEON::BI__builtin_neon_vst4_v:
13895 case NEON::BI__builtin_neon_vst4q_v: {
13896 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13897 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13898 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13899 Ops, "");
13900 }
13901 case NEON::BI__builtin_neon_vst4_lane_v:
13902 case NEON::BI__builtin_neon_vst4q_lane_v: {
13903 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13904 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13905 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13906 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13907 Ops, "");
13908 }
13909 case NEON::BI__builtin_neon_vtrn_v:
13910 case NEON::BI__builtin_neon_vtrnq_v: {
13911 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13912 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13913 Value *SV = nullptr;
13914
13915 for (unsigned vi = 0; vi != 2; ++vi) {
13916 SmallVector<int, 16> Indices;
13917 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13918 Indices.push_back(i+vi);
13919 Indices.push_back(i+e+vi);
13920 }
13921 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13922 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13923 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13924 }
13925 return SV;
13926 }
13927 case NEON::BI__builtin_neon_vuzp_v:
13928 case NEON::BI__builtin_neon_vuzpq_v: {
13929 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13930 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13931 Value *SV = nullptr;
13932
13933 for (unsigned vi = 0; vi != 2; ++vi) {
13934 SmallVector<int, 16> Indices;
13935 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13936 Indices.push_back(2*i+vi);
13937
13938 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13939 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13940 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13941 }
13942 return SV;
13943 }
13944 case NEON::BI__builtin_neon_vzip_v:
13945 case NEON::BI__builtin_neon_vzipq_v: {
13946 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13947 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13948 Value *SV = nullptr;
13949
13950 for (unsigned vi = 0; vi != 2; ++vi) {
13951 SmallVector<int, 16> Indices;
13952 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13953 Indices.push_back((i + vi*e) >> 1);
13954 Indices.push_back(((i + vi*e) >> 1)+e);
13955 }
13956 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13957 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13958 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13959 }
13960 return SV;
13961 }
13962 case NEON::BI__builtin_neon_vqtbl1q_v: {
13963 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13964 Ops, "vtbl1");
13965 }
13966 case NEON::BI__builtin_neon_vqtbl2q_v: {
13967 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13968 Ops, "vtbl2");
13969 }
13970 case NEON::BI__builtin_neon_vqtbl3q_v: {
13971 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13972 Ops, "vtbl3");
13973 }
13974 case NEON::BI__builtin_neon_vqtbl4q_v: {
13975 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13976 Ops, "vtbl4");
13977 }
13978 case NEON::BI__builtin_neon_vqtbx1q_v: {
13979 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13980 Ops, "vtbx1");
13981 }
13982 case NEON::BI__builtin_neon_vqtbx2q_v: {
13983 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13984 Ops, "vtbx2");
13985 }
13986 case NEON::BI__builtin_neon_vqtbx3q_v: {
13987 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13988 Ops, "vtbx3");
13989 }
13990 case NEON::BI__builtin_neon_vqtbx4q_v: {
13991 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13992 Ops, "vtbx4");
13993 }
13994 case NEON::BI__builtin_neon_vsqadd_v:
13995 case NEON::BI__builtin_neon_vsqaddq_v: {
13996 Int = Intrinsic::aarch64_neon_usqadd;
13997 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13998 }
13999 case NEON::BI__builtin_neon_vuqadd_v:
14000 case NEON::BI__builtin_neon_vuqaddq_v: {
14001 Int = Intrinsic::aarch64_neon_suqadd;
14002 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
14003 }
14004
14005 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
14006 case NEON::BI__builtin_neon_vluti2_laneq_f16:
14007 case NEON::BI__builtin_neon_vluti2_laneq_p16:
14008 case NEON::BI__builtin_neon_vluti2_laneq_p8:
14009 case NEON::BI__builtin_neon_vluti2_laneq_s16:
14010 case NEON::BI__builtin_neon_vluti2_laneq_s8:
14011 case NEON::BI__builtin_neon_vluti2_laneq_u16:
14012 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
14013 Int = Intrinsic::aarch64_neon_vluti2_laneq;
14014 llvm::Type *Tys[2];
14015 Tys[0] = Ty;
14016 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14017 /*isQuad*/ false));
14018 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
14019 }
14020 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
14021 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
14022 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
14023 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
14024 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
14025 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
14026 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
14027 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
14028 Int = Intrinsic::aarch64_neon_vluti2_laneq;
14029 llvm::Type *Tys[2];
14030 Tys[0] = Ty;
14031 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14032 /*isQuad*/ true));
14033 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
14034 }
14035 case NEON::BI__builtin_neon_vluti2_lane_bf16:
14036 case NEON::BI__builtin_neon_vluti2_lane_f16:
14037 case NEON::BI__builtin_neon_vluti2_lane_p16:
14038 case NEON::BI__builtin_neon_vluti2_lane_p8:
14039 case NEON::BI__builtin_neon_vluti2_lane_s16:
14040 case NEON::BI__builtin_neon_vluti2_lane_s8:
14041 case NEON::BI__builtin_neon_vluti2_lane_u16:
14042 case NEON::BI__builtin_neon_vluti2_lane_u8: {
14043 Int = Intrinsic::aarch64_neon_vluti2_lane;
14044 llvm::Type *Tys[2];
14045 Tys[0] = Ty;
14046 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14047 /*isQuad*/ false));
14048 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
14049 }
14050 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
14051 case NEON::BI__builtin_neon_vluti2q_lane_f16:
14052 case NEON::BI__builtin_neon_vluti2q_lane_p16:
14053 case NEON::BI__builtin_neon_vluti2q_lane_p8:
14054 case NEON::BI__builtin_neon_vluti2q_lane_s16:
14055 case NEON::BI__builtin_neon_vluti2q_lane_s8:
14056 case NEON::BI__builtin_neon_vluti2q_lane_u16:
14057 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
14058 Int = Intrinsic::aarch64_neon_vluti2_lane;
14059 llvm::Type *Tys[2];
14060 Tys[0] = Ty;
14061 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14062 /*isQuad*/ true));
14063 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
14064 }
14065 case NEON::BI__builtin_neon_vluti4q_lane_p8:
14066 case NEON::BI__builtin_neon_vluti4q_lane_s8:
14067 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
14068 Int = Intrinsic::aarch64_neon_vluti4q_lane;
14069 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
14070 }
14071 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
14072 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
14073 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
14074 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
14075 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
14076 }
14077 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
14078 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
14079 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
14080 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
14081 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
14082 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
14083 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
14084 }
14085 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
14086 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
14087 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
14088 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
14089 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
14090 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
14091 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
14092 }
14093
14094 case NEON::BI__builtin_neon_vamin_f16:
14095 case NEON::BI__builtin_neon_vaminq_f16:
14096 case NEON::BI__builtin_neon_vamin_f32:
14097 case NEON::BI__builtin_neon_vaminq_f32:
14098 case NEON::BI__builtin_neon_vaminq_f64: {
14099 Int = Intrinsic::aarch64_neon_famin;
14100 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
14101 }
14102 case NEON::BI__builtin_neon_vamax_f16:
14103 case NEON::BI__builtin_neon_vamaxq_f16:
14104 case NEON::BI__builtin_neon_vamax_f32:
14105 case NEON::BI__builtin_neon_vamaxq_f32:
14106 case NEON::BI__builtin_neon_vamaxq_f64: {
14107 Int = Intrinsic::aarch64_neon_famax;
14108 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
14109 }
14110 case NEON::BI__builtin_neon_vscale_f16:
14111 case NEON::BI__builtin_neon_vscaleq_f16:
14112 case NEON::BI__builtin_neon_vscale_f32:
14113 case NEON::BI__builtin_neon_vscaleq_f32:
14114 case NEON::BI__builtin_neon_vscaleq_f64: {
14115 Int = Intrinsic::aarch64_neon_fp8_fscale;
14116 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
14117 }
14118 }
14119}
14120
14121Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
14122 const CallExpr *E) {
14123 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
14124 BuiltinID == BPF::BI__builtin_btf_type_id ||
14125 BuiltinID == BPF::BI__builtin_preserve_type_info ||
14126 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
14127 "unexpected BPF builtin");
14128
14129 // A sequence number, injected into IR builtin functions, to
14130 // prevent CSE given the only difference of the function
14131 // may just be the debuginfo metadata.
14132 static uint32_t BuiltinSeqNum;
14133
14134 switch (BuiltinID) {
14135 default:
14136 llvm_unreachable("Unexpected BPF builtin");
14137 case BPF::BI__builtin_preserve_field_info: {
14138 const Expr *Arg = E->getArg(0);
14139 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
14140
14141 if (!getDebugInfo()) {
14142 CGM.Error(E->getExprLoc(),
14143 "using __builtin_preserve_field_info() without -g");
14144 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14145 : EmitLValue(Arg).emitRawPointer(*this);
14146 }
14147
14148 // Enable underlying preserve_*_access_index() generation.
14149 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
14150 IsInPreservedAIRegion = true;
14151 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14152 : EmitLValue(Arg).emitRawPointer(*this);
14153 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
14154
14155 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14156 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
14157
14158 // Built the IR for the preserve_field_info intrinsic.
14159 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14160 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14161 {FieldAddr->getType()});
14162 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14163 }
14164 case BPF::BI__builtin_btf_type_id:
14165 case BPF::BI__builtin_preserve_type_info: {
14166 if (!getDebugInfo()) {
14167 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14168 return nullptr;
14169 }
14170
14171 const Expr *Arg0 = E->getArg(0);
14172 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14173 Arg0->getType(), Arg0->getExprLoc());
14174
14175 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14176 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14177 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14178
14179 llvm::Function *FnDecl;
14180 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14181 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14182 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14183 else
14184 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14185 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14186 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14187 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14188 return Fn;
14189 }
14190 case BPF::BI__builtin_preserve_enum_value: {
14191 if (!getDebugInfo()) {
14192 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14193 return nullptr;
14194 }
14195
14196 const Expr *Arg0 = E->getArg(0);
14197 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14198 Arg0->getType(), Arg0->getExprLoc());
14199
14200 // Find enumerator
14201 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
14202 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14203 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14204 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14205
14206 auto InitVal = Enumerator->getInitVal();
14207 std::string InitValStr;
14208 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
14209 InitValStr = std::to_string(InitVal.getSExtValue());
14210 else
14211 InitValStr = std::to_string(InitVal.getZExtValue());
14212 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
14213 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
14214
14215 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14216 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14217 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14218
14219 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14220 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14221 CallInst *Fn =
14222 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14223 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14224 return Fn;
14225 }
14226 }
14227}
14228
14229llvm::Value *CodeGenFunction::
14231 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14232 "Not a power-of-two sized vector!");
14233 bool AllConstants = true;
14234 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14235 AllConstants &= isa<Constant>(Ops[i]);
14236
14237 // If this is a constant vector, create a ConstantVector.
14238 if (AllConstants) {
14240 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14241 CstOps.push_back(cast<Constant>(Ops[i]));
14242 return llvm::ConstantVector::get(CstOps);
14243 }
14244
14245 // Otherwise, insertelement the values to build the vector.
14246 Value *Result = llvm::PoisonValue::get(
14247 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14248
14249 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14250 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
14251
14252 return Result;
14253}
14254
14255// Convert the mask from an integer type to a vector of i1.
14257 unsigned NumElts) {
14258
14259 auto *MaskTy = llvm::FixedVectorType::get(
14260 CGF.Builder.getInt1Ty(),
14261 cast<IntegerType>(Mask->getType())->getBitWidth());
14262 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
14263
14264 // If we have less than 8 elements, then the starting mask was an i8 and
14265 // we need to extract down to the right number of elements.
14266 if (NumElts < 8) {
14267 int Indices[4];
14268 for (unsigned i = 0; i != NumElts; ++i)
14269 Indices[i] = i;
14270 MaskVec = CGF.Builder.CreateShuffleVector(
14271 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
14272 }
14273 return MaskVec;
14274}
14275
14277 Align Alignment) {
14278 Value *Ptr = Ops[0];
14279
14280 Value *MaskVec = getMaskVecValue(
14281 CGF, Ops[2],
14282 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14283
14284 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14285}
14286
14288 Align Alignment) {
14289 llvm::Type *Ty = Ops[1]->getType();
14290 Value *Ptr = Ops[0];
14291
14292 Value *MaskVec = getMaskVecValue(
14293 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14294
14295 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14296}
14297
14299 ArrayRef<Value *> Ops) {
14300 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14301 Value *Ptr = Ops[0];
14302
14303 Value *MaskVec = getMaskVecValue(
14304 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14305
14306 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
14307 ResultTy);
14308 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14309}
14310
14313 bool IsCompress) {
14314 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14315
14316 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14317
14318 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14319 : Intrinsic::x86_avx512_mask_expand;
14320 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
14321 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14322}
14323
14325 ArrayRef<Value *> Ops) {
14326 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14327 Value *Ptr = Ops[0];
14328
14329 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14330
14331 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
14332 ResultTy);
14333 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14334}
14335
14336static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
14338 bool InvertLHS = false) {
14339 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14340 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
14341 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
14342
14343 if (InvertLHS)
14344 LHS = CGF.Builder.CreateNot(LHS);
14345
14346 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
14347 Ops[0]->getType());
14348}
14349
14351 Value *Amt, bool IsRight) {
14352 llvm::Type *Ty = Op0->getType();
14353
14354 // Amount may be scalar immediate, in which case create a splat vector.
14355 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14356 // we only care about the lowest log2 bits anyway.
14357 if (Amt->getType() != Ty) {
14358 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14359 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
14360 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
14361 }
14362
14363 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14364 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
14365 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
14366}
14367
14369 bool IsSigned) {
14370 Value *Op0 = Ops[0];
14371 Value *Op1 = Ops[1];
14372 llvm::Type *Ty = Op0->getType();
14373 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14374
14375 CmpInst::Predicate Pred;
14376 switch (Imm) {
14377 case 0x0:
14378 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14379 break;
14380 case 0x1:
14381 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14382 break;
14383 case 0x2:
14384 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14385 break;
14386 case 0x3:
14387 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14388 break;
14389 case 0x4:
14390 Pred = ICmpInst::ICMP_EQ;
14391 break;
14392 case 0x5:
14393 Pred = ICmpInst::ICMP_NE;
14394 break;
14395 case 0x6:
14396 return llvm::Constant::getNullValue(Ty); // FALSE
14397 case 0x7:
14398 return llvm::Constant::getAllOnesValue(Ty); // TRUE
14399 default:
14400 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14401 }
14402
14403 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
14404 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
14405 return Res;
14406}
14407
14409 Value *Mask, Value *Op0, Value *Op1) {
14410
14411 // If the mask is all ones just return first argument.
14412 if (const auto *C = dyn_cast<Constant>(Mask))
14413 if (C->isAllOnesValue())
14414 return Op0;
14415
14416 Mask = getMaskVecValue(
14417 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
14418
14419 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14420}
14421
14423 Value *Mask, Value *Op0, Value *Op1) {
14424 // If the mask is all ones just return first argument.
14425 if (const auto *C = dyn_cast<Constant>(Mask))
14426 if (C->isAllOnesValue())
14427 return Op0;
14428
14429 auto *MaskTy = llvm::FixedVectorType::get(
14430 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
14431 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
14432 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
14433 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14434}
14435
14437 unsigned NumElts, Value *MaskIn) {
14438 if (MaskIn) {
14439 const auto *C = dyn_cast<Constant>(MaskIn);
14440 if (!C || !C->isAllOnesValue())
14441 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
14442 }
14443
14444 if (NumElts < 8) {
14445 int Indices[8];
14446 for (unsigned i = 0; i != NumElts; ++i)
14447 Indices[i] = i;
14448 for (unsigned i = NumElts; i != 8; ++i)
14449 Indices[i] = i % NumElts + NumElts;
14450 Cmp = CGF.Builder.CreateShuffleVector(
14451 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14452 }
14453
14454 return CGF.Builder.CreateBitCast(Cmp,
14455 IntegerType::get(CGF.getLLVMContext(),
14456 std::max(NumElts, 8U)));
14457}
14458
14460 bool Signed, ArrayRef<Value *> Ops) {
14461 assert((Ops.size() == 2 || Ops.size() == 4) &&
14462 "Unexpected number of arguments");
14463 unsigned NumElts =
14464 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14465 Value *Cmp;
14466
14467 if (CC == 3) {
14468 Cmp = Constant::getNullValue(
14469 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14470 } else if (CC == 7) {
14471 Cmp = Constant::getAllOnesValue(
14472 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14473 } else {
14474 ICmpInst::Predicate Pred;
14475 switch (CC) {
14476 default: llvm_unreachable("Unknown condition code");
14477 case 0: Pred = ICmpInst::ICMP_EQ; break;
14478 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
14479 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
14480 case 4: Pred = ICmpInst::ICMP_NE; break;
14481 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
14482 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
14483 }
14484 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14485 }
14486
14487 Value *MaskIn = nullptr;
14488 if (Ops.size() == 4)
14489 MaskIn = Ops[3];
14490
14491 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
14492}
14493
14495 Value *Zero = Constant::getNullValue(In->getType());
14496 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
14497}
14498
14500 ArrayRef<Value *> Ops, bool IsSigned) {
14501 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14502 llvm::Type *Ty = Ops[1]->getType();
14503
14504 Value *Res;
14505 if (Rnd != 4) {
14506 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14507 : Intrinsic::x86_avx512_uitofp_round;
14508 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
14509 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
14510 } else {
14511 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14512 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
14513 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
14514 }
14515
14516 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14517}
14518
14519// Lowers X86 FMA intrinsics to IR.
14521 ArrayRef<Value *> Ops, unsigned BuiltinID,
14522 bool IsAddSub) {
14523
14524 bool Subtract = false;
14525 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14526 switch (BuiltinID) {
14527 default: break;
14528 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14529 Subtract = true;
14530 [[fallthrough]];
14531 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14532 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14533 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14534 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14535 break;
14536 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14537 Subtract = true;
14538 [[fallthrough]];
14539 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14540 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14541 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14542 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14543 break;
14544 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14545 Subtract = true;
14546 [[fallthrough]];
14547 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14548 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14549 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14550 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
14551 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14552 Subtract = true;
14553 [[fallthrough]];
14554 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14555 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14556 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14557 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
14558 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14559 Subtract = true;
14560 [[fallthrough]];
14561 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14562 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14563 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14564 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14565 break;
14566 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14567 Subtract = true;
14568 [[fallthrough]];
14569 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14570 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14571 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14572 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14573 break;
14574 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14575 Subtract = true;
14576 LLVM_FALLTHROUGH;
14577 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14578 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14579 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14580 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14581 break;
14582 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14583 Subtract = true;
14584 LLVM_FALLTHROUGH;
14585 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14586 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14587 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14588 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14589 break;
14590 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14591 Subtract = true;
14592 LLVM_FALLTHROUGH;
14593 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14594 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14595 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14596 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14597 break;
14598 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14599 Subtract = true;
14600 LLVM_FALLTHROUGH;
14601 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14602 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14603 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14604 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14605 break;
14606 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14607 Subtract = true;
14608 LLVM_FALLTHROUGH;
14609 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14610 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14611 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14612 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14613 break;
14614 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14615 Subtract = true;
14616 LLVM_FALLTHROUGH;
14617 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14618 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14619 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14620 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14621 break;
14622 }
14623
14624 Value *A = Ops[0];
14625 Value *B = Ops[1];
14626 Value *C = Ops[2];
14627
14628 if (Subtract)
14629 C = CGF.Builder.CreateFNeg(C);
14630
14631 Value *Res;
14632
14633 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14634 if (IID != Intrinsic::not_intrinsic &&
14635 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14636 IsAddSub)) {
14637 Function *Intr = CGF.CGM.getIntrinsic(IID);
14638 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14639 } else {
14640 llvm::Type *Ty = A->getType();
14641 Function *FMA;
14642 if (CGF.Builder.getIsFPConstrained()) {
14643 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14644 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14645 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14646 } else {
14647 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14648 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14649 }
14650 }
14651
14652 // Handle any required masking.
14653 Value *MaskFalseVal = nullptr;
14654 switch (BuiltinID) {
14655 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14656 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14657 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14658 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14659 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14660 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14661 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14662 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14663 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14664 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14665 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14666 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14667 MaskFalseVal = Ops[0];
14668 break;
14669 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14670 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14671 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14672 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14673 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14674 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14675 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14676 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14677 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14678 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14679 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14680 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14681 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14682 break;
14683 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14684 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14685 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14686 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14687 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14688 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14689 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14690 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14691 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14692 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14693 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14694 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14695 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14696 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14697 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14698 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14699 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14700 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14701 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14702 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14703 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14704 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14705 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14706 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14707 MaskFalseVal = Ops[2];
14708 break;
14709 }
14710
14711 if (MaskFalseVal)
14712 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14713
14714 return Res;
14715}
14716
14718 MutableArrayRef<Value *> Ops, Value *Upper,
14719 bool ZeroMask = false, unsigned PTIdx = 0,
14720 bool NegAcc = false) {
14721 unsigned Rnd = 4;
14722 if (Ops.size() > 4)
14723 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14724
14725 if (NegAcc)
14726 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14727
14728 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14729 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14730 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14731 Value *Res;
14732 if (Rnd != 4) {
14733 Intrinsic::ID IID;
14734
14735 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14736 case 16:
14737 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14738 break;
14739 case 32:
14740 IID = Intrinsic::x86_avx512_vfmadd_f32;
14741 break;
14742 case 64:
14743 IID = Intrinsic::x86_avx512_vfmadd_f64;
14744 break;
14745 default:
14746 llvm_unreachable("Unexpected size");
14747 }
14748 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14749 {Ops[0], Ops[1], Ops[2], Ops[4]});
14750 } else if (CGF.Builder.getIsFPConstrained()) {
14751 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14752 Function *FMA = CGF.CGM.getIntrinsic(
14753 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14754 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14755 } else {
14756 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14757 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14758 }
14759 // If we have more than 3 arguments, we need to do masking.
14760 if (Ops.size() > 3) {
14761 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14762 : Ops[PTIdx];
14763
14764 // If we negated the accumulator and the its the PassThru value we need to
14765 // bypass the negate. Conveniently Upper should be the same thing in this
14766 // case.
14767 if (NegAcc && PTIdx == 2)
14768 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14769
14770 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14771 }
14772 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14773}
14774
14775static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14776 ArrayRef<Value *> Ops) {
14777 llvm::Type *Ty = Ops[0]->getType();
14778 // Arguments have a vXi32 type so cast to vXi64.
14779 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14780 Ty->getPrimitiveSizeInBits() / 64);
14781 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14782 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14783
14784 if (IsSigned) {
14785 // Shift left then arithmetic shift right.
14786 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14787 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14788 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14789 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14790 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14791 } else {
14792 // Clear the upper bits.
14793 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14794 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14795 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14796 }
14797
14798 return CGF.Builder.CreateMul(LHS, RHS);
14799}
14800
14801// Emit a masked pternlog intrinsic. This only exists because the header has to
14802// use a macro and we aren't able to pass the input argument to a pternlog
14803// builtin and a select builtin without evaluating it twice.
14804static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14805 ArrayRef<Value *> Ops) {
14806 llvm::Type *Ty = Ops[0]->getType();
14807
14808 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14809 unsigned EltWidth = Ty->getScalarSizeInBits();
14810 Intrinsic::ID IID;
14811 if (VecWidth == 128 && EltWidth == 32)
14812 IID = Intrinsic::x86_avx512_pternlog_d_128;
14813 else if (VecWidth == 256 && EltWidth == 32)
14814 IID = Intrinsic::x86_avx512_pternlog_d_256;
14815 else if (VecWidth == 512 && EltWidth == 32)
14816 IID = Intrinsic::x86_avx512_pternlog_d_512;
14817 else if (VecWidth == 128 && EltWidth == 64)
14818 IID = Intrinsic::x86_avx512_pternlog_q_128;
14819 else if (VecWidth == 256 && EltWidth == 64)
14820 IID = Intrinsic::x86_avx512_pternlog_q_256;
14821 else if (VecWidth == 512 && EltWidth == 64)
14822 IID = Intrinsic::x86_avx512_pternlog_q_512;
14823 else
14824 llvm_unreachable("Unexpected intrinsic");
14825
14826 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14827 Ops.drop_back());
14828 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14829 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14830}
14831
14833 llvm::Type *DstTy) {
14834 unsigned NumberOfElements =
14835 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14836 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14837 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14838}
14839
14840Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14841 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14842 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14843 return EmitX86CpuIs(CPUStr);
14844}
14845
14846// Convert F16 halfs to floats.
14849 llvm::Type *DstTy) {
14850 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14851 "Unknown cvtph2ps intrinsic");
14852
14853 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14854 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14855 Function *F =
14856 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14857 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14858 }
14859
14860 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14861 Value *Src = Ops[0];
14862
14863 // Extract the subvector.
14864 if (NumDstElts !=
14865 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14866 assert(NumDstElts == 4 && "Unexpected vector size");
14867 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14868 }
14869
14870 // Bitcast from vXi16 to vXf16.
14871 auto *HalfTy = llvm::FixedVectorType::get(
14872 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14873 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14874
14875 // Perform the fp-extension.
14876 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14877
14878 if (Ops.size() >= 3)
14879 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14880 return Res;
14881}
14882
14883Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14884
14885 llvm::Type *Int32Ty = Builder.getInt32Ty();
14886
14887 // Matching the struct layout from the compiler-rt/libgcc structure that is
14888 // filled in:
14889 // unsigned int __cpu_vendor;
14890 // unsigned int __cpu_type;
14891 // unsigned int __cpu_subtype;
14892 // unsigned int __cpu_features[1];
14893 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14894 llvm::ArrayType::get(Int32Ty, 1));
14895
14896 // Grab the global __cpu_model.
14897 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14898 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14899
14900 // Calculate the index needed to access the correct field based on the
14901 // range. Also adjust the expected value.
14902 unsigned Index;
14903 unsigned Value;
14904 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14905#define X86_VENDOR(ENUM, STRING) \
14906 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14907#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14908 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14909#define X86_CPU_TYPE(ENUM, STR) \
14910 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14911#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14912 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14913#define X86_CPU_SUBTYPE(ENUM, STR) \
14914 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14915#include "llvm/TargetParser/X86TargetParser.def"
14916 .Default({0, 0});
14917 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14918
14919 // Grab the appropriate field from __cpu_model.
14920 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14921 ConstantInt::get(Int32Ty, Index)};
14922 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14923 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14925
14926 // Check the value of the field against the requested value.
14927 return Builder.CreateICmpEQ(CpuValue,
14928 llvm::ConstantInt::get(Int32Ty, Value));
14929}
14930
14931Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14932 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14933 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14934 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14935 return Builder.getFalse();
14936 return EmitX86CpuSupports(FeatureStr);
14937}
14938
14939Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14940 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14941}
14942
14943llvm::Value *
14944CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14945 Value *Result = Builder.getTrue();
14946 if (FeatureMask[0] != 0) {
14947 // Matching the struct layout from the compiler-rt/libgcc structure that is
14948 // filled in:
14949 // unsigned int __cpu_vendor;
14950 // unsigned int __cpu_type;
14951 // unsigned int __cpu_subtype;
14952 // unsigned int __cpu_features[1];
14953 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14954 llvm::ArrayType::get(Int32Ty, 1));
14955
14956 // Grab the global __cpu_model.
14957 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14958 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14959
14960 // Grab the first (0th) element from the field __cpu_features off of the
14961 // global in the struct STy.
14962 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14963 Builder.getInt32(0)};
14964 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14965 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14967
14968 // Check the value of the bit corresponding to the feature requested.
14969 Value *Mask = Builder.getInt32(FeatureMask[0]);
14970 Value *Bitset = Builder.CreateAnd(Features, Mask);
14971 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14972 Result = Builder.CreateAnd(Result, Cmp);
14973 }
14974
14975 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14976 llvm::Constant *CpuFeatures2 =
14977 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14978 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14979 for (int i = 1; i != 4; ++i) {
14980 const uint32_t M = FeatureMask[i];
14981 if (!M)
14982 continue;
14983 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14984 Value *Features = Builder.CreateAlignedLoad(
14985 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14987 // Check the value of the bit corresponding to the feature requested.
14988 Value *Mask = Builder.getInt32(M);
14989 Value *Bitset = Builder.CreateAnd(Features, Mask);
14990 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14991 Result = Builder.CreateAnd(Result, Cmp);
14992 }
14993
14994 return Result;
14995}
14996
14997Value *CodeGenFunction::EmitAArch64CpuInit() {
14998 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14999 llvm::FunctionCallee Func =
15000 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
15001 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
15002 cast<llvm::GlobalValue>(Func.getCallee())
15003 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
15004 return Builder.CreateCall(Func);
15005}
15006
15008 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
15009 llvm::FunctionCallee Func =
15010 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
15011 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
15012 CalleeGV->setDSOLocal(true);
15013 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
15014 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
15015}
15016
15017Value *CodeGenFunction::EmitX86CpuInit() {
15018 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
15019 /*Variadic*/ false);
15020 llvm::FunctionCallee Func =
15021 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
15022 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
15023 cast<llvm::GlobalValue>(Func.getCallee())
15024 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
15025 return Builder.CreateCall(Func);
15026}
15027
15028Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
15029 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
15030 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
15032 ArgStr.split(Features, "+");
15033 for (auto &Feature : Features) {
15034 Feature = Feature.trim();
15035 if (!llvm::AArch64::parseFMVExtension(Feature))
15036 return Builder.getFalse();
15037 if (Feature != "default")
15038 Features.push_back(Feature);
15039 }
15040 return EmitAArch64CpuSupports(Features);
15041}
15042
15043llvm::Value *
15044CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
15045 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
15046 Value *Result = Builder.getTrue();
15047 if (FeaturesMask != 0) {
15048 // Get features from structure in runtime library
15049 // struct {
15050 // unsigned long long features;
15051 // } __aarch64_cpu_features;
15052 llvm::Type *STy = llvm::StructType::get(Int64Ty);
15053 llvm::Constant *AArch64CPUFeatures =
15054 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
15055 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
15056 llvm::Value *CpuFeatures = Builder.CreateGEP(
15057 STy, AArch64CPUFeatures,
15058 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
15059 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
15061 Value *Mask = Builder.getInt64(FeaturesMask);
15062 Value *Bitset = Builder.CreateAnd(Features, Mask);
15063 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
15064 Result = Builder.CreateAnd(Result, Cmp);
15065 }
15066 return Result;
15067}
15068
15070
15071 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
15072 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
15073 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
15074 return Builder.getFalse();
15075
15076 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
15077}
15078
15079static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
15080 CodeGenModule &CGM) {
15081 llvm::Type *Int32Ty = Builder.getInt32Ty();
15082 llvm::Type *Int64Ty = Builder.getInt64Ty();
15083 llvm::ArrayType *ArrayOfInt64Ty =
15084 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
15085 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
15086 llvm::Constant *RISCVFeaturesBits =
15087 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
15088 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
15089 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
15090 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
15091 IndexVal};
15092 Value *Ptr =
15093 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
15094 Value *FeaturesBit =
15095 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
15096 return FeaturesBit;
15097}
15098
15100 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
15101 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
15102
15103 for (auto Feat : FeaturesStrs) {
15104 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
15105
15106 // If there isn't BitPos for this feature, skip this version.
15107 // It also report the warning to user during compilation.
15108 if (BitPos == -1)
15109 return Builder.getFalse();
15110
15111 RequireBitMasks[GroupID] |= (1ULL << BitPos);
15112 }
15113
15114 Value *Result = nullptr;
15115 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
15116 if (RequireBitMasks[Idx] == 0)
15117 continue;
15118
15119 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
15120 Value *Bitset =
15121 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
15122 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
15123 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
15124 }
15125
15126 assert(Result && "Should have value here.");
15127
15128 return Result;
15129}
15130
15131Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
15132 const CallExpr *E) {
15133 if (BuiltinID == Builtin::BI__builtin_cpu_is)
15134 return EmitX86CpuIs(E);
15135 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
15136 return EmitX86CpuSupports(E);
15137 if (BuiltinID == Builtin::BI__builtin_cpu_init)
15138 return EmitX86CpuInit();
15139
15140 // Handle MSVC intrinsics before argument evaluation to prevent double
15141 // evaluation.
15142 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
15143 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
15144
15146 bool IsMaskFCmp = false;
15147 bool IsConjFMA = false;
15148
15149 // Find out if any arguments are required to be integer constant expressions.
15150 unsigned ICEArguments = 0;
15152 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
15153 assert(Error == ASTContext::GE_None && "Should not codegen an error");
15154
15155 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
15156 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
15157 }
15158
15159 // These exist so that the builtin that takes an immediate can be bounds
15160 // checked by clang to avoid passing bad immediates to the backend. Since
15161 // AVX has a larger immediate than SSE we would need separate builtins to
15162 // do the different bounds checking. Rather than create a clang specific
15163 // SSE only builtin, this implements eight separate builtins to match gcc
15164 // implementation.
15165 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
15166 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
15167 llvm::Function *F = CGM.getIntrinsic(ID);
15168 return Builder.CreateCall(F, Ops);
15169 };
15170
15171 // For the vector forms of FP comparisons, translate the builtins directly to
15172 // IR.
15173 // TODO: The builtins could be removed if the SSE header files used vector
15174 // extension comparisons directly (vector ordered/unordered may need
15175 // additional support via __builtin_isnan()).
15176 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
15177 bool IsSignaling) {
15178 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15179 Value *Cmp;
15180 if (IsSignaling)
15181 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15182 else
15183 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15184 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15185 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15186 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
15187 return Builder.CreateBitCast(Sext, FPVecTy);
15188 };
15189
15190 switch (BuiltinID) {
15191 default: return nullptr;
15192 case X86::BI_mm_prefetch: {
15193 Value *Address = Ops[0];
15194 ConstantInt *C = cast<ConstantInt>(Ops[1]);
15195 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
15196 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
15197 Value *Data = ConstantInt::get(Int32Ty, 1);
15198 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
15199 return Builder.CreateCall(F, {Address, RW, Locality, Data});
15200 }
15201 case X86::BI_mm_clflush: {
15202 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
15203 Ops[0]);
15204 }
15205 case X86::BI_mm_lfence: {
15206 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
15207 }
15208 case X86::BI_mm_mfence: {
15209 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
15210 }
15211 case X86::BI_mm_sfence: {
15212 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
15213 }
15214 case X86::BI_mm_pause: {
15215 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
15216 }
15217 case X86::BI__rdtsc: {
15218 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
15219 }
15220 case X86::BI__builtin_ia32_rdtscp: {
15221 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
15222 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15223 Ops[0]);
15224 return Builder.CreateExtractValue(Call, 0);
15225 }
15226 case X86::BI__builtin_ia32_lzcnt_u16:
15227 case X86::BI__builtin_ia32_lzcnt_u32:
15228 case X86::BI__builtin_ia32_lzcnt_u64: {
15229 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15230 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15231 }
15232 case X86::BI__builtin_ia32_tzcnt_u16:
15233 case X86::BI__builtin_ia32_tzcnt_u32:
15234 case X86::BI__builtin_ia32_tzcnt_u64: {
15235 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
15236 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15237 }
15238 case X86::BI__builtin_ia32_undef128:
15239 case X86::BI__builtin_ia32_undef256:
15240 case X86::BI__builtin_ia32_undef512:
15241 // The x86 definition of "undef" is not the same as the LLVM definition
15242 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15243 // IR optimizer and backend.
15244 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15245 // value, we should use that here instead of a zero.
15246 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15247 case X86::BI__builtin_ia32_vec_ext_v4hi:
15248 case X86::BI__builtin_ia32_vec_ext_v16qi:
15249 case X86::BI__builtin_ia32_vec_ext_v8hi:
15250 case X86::BI__builtin_ia32_vec_ext_v4si:
15251 case X86::BI__builtin_ia32_vec_ext_v4sf:
15252 case X86::BI__builtin_ia32_vec_ext_v2di:
15253 case X86::BI__builtin_ia32_vec_ext_v32qi:
15254 case X86::BI__builtin_ia32_vec_ext_v16hi:
15255 case X86::BI__builtin_ia32_vec_ext_v8si:
15256 case X86::BI__builtin_ia32_vec_ext_v4di: {
15257 unsigned NumElts =
15258 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15259 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15260 Index &= NumElts - 1;
15261 // These builtins exist so we can ensure the index is an ICE and in range.
15262 // Otherwise we could just do this in the header file.
15263 return Builder.CreateExtractElement(Ops[0], Index);
15264 }
15265 case X86::BI__builtin_ia32_vec_set_v4hi:
15266 case X86::BI__builtin_ia32_vec_set_v16qi:
15267 case X86::BI__builtin_ia32_vec_set_v8hi:
15268 case X86::BI__builtin_ia32_vec_set_v4si:
15269 case X86::BI__builtin_ia32_vec_set_v2di:
15270 case X86::BI__builtin_ia32_vec_set_v32qi:
15271 case X86::BI__builtin_ia32_vec_set_v16hi:
15272 case X86::BI__builtin_ia32_vec_set_v8si:
15273 case X86::BI__builtin_ia32_vec_set_v4di: {
15274 unsigned NumElts =
15275 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15276 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15277 Index &= NumElts - 1;
15278 // These builtins exist so we can ensure the index is an ICE and in range.
15279 // Otherwise we could just do this in the header file.
15280 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15281 }
15282 case X86::BI_mm_setcsr:
15283 case X86::BI__builtin_ia32_ldmxcsr: {
15284 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
15285 Builder.CreateStore(Ops[0], Tmp);
15286 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
15287 Tmp.getPointer());
15288 }
15289 case X86::BI_mm_getcsr:
15290 case X86::BI__builtin_ia32_stmxcsr: {
15292 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
15293 Tmp.getPointer());
15294 return Builder.CreateLoad(Tmp, "stmxcsr");
15295 }
15296 case X86::BI__builtin_ia32_xsave:
15297 case X86::BI__builtin_ia32_xsave64:
15298 case X86::BI__builtin_ia32_xrstor:
15299 case X86::BI__builtin_ia32_xrstor64:
15300 case X86::BI__builtin_ia32_xsaveopt:
15301 case X86::BI__builtin_ia32_xsaveopt64:
15302 case X86::BI__builtin_ia32_xrstors:
15303 case X86::BI__builtin_ia32_xrstors64:
15304 case X86::BI__builtin_ia32_xsavec:
15305 case X86::BI__builtin_ia32_xsavec64:
15306 case X86::BI__builtin_ia32_xsaves:
15307 case X86::BI__builtin_ia32_xsaves64:
15308 case X86::BI__builtin_ia32_xsetbv:
15309 case X86::BI_xsetbv: {
15310 Intrinsic::ID ID;
15311#define INTRINSIC_X86_XSAVE_ID(NAME) \
15312 case X86::BI__builtin_ia32_##NAME: \
15313 ID = Intrinsic::x86_##NAME; \
15314 break
15315 switch (BuiltinID) {
15316 default: llvm_unreachable("Unsupported intrinsic!");
15318 INTRINSIC_X86_XSAVE_ID(xsave64);
15319 INTRINSIC_X86_XSAVE_ID(xrstor);
15320 INTRINSIC_X86_XSAVE_ID(xrstor64);
15321 INTRINSIC_X86_XSAVE_ID(xsaveopt);
15322 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
15323 INTRINSIC_X86_XSAVE_ID(xrstors);
15324 INTRINSIC_X86_XSAVE_ID(xrstors64);
15325 INTRINSIC_X86_XSAVE_ID(xsavec);
15326 INTRINSIC_X86_XSAVE_ID(xsavec64);
15327 INTRINSIC_X86_XSAVE_ID(xsaves);
15328 INTRINSIC_X86_XSAVE_ID(xsaves64);
15329 INTRINSIC_X86_XSAVE_ID(xsetbv);
15330 case X86::BI_xsetbv:
15331 ID = Intrinsic::x86_xsetbv;
15332 break;
15333 }
15334#undef INTRINSIC_X86_XSAVE_ID
15335 Value *Mhi = Builder.CreateTrunc(
15336 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
15337 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
15338 Ops[1] = Mhi;
15339 Ops.push_back(Mlo);
15340 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15341 }
15342 case X86::BI__builtin_ia32_xgetbv:
15343 case X86::BI_xgetbv:
15344 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
15345 case X86::BI__builtin_ia32_storedqudi128_mask:
15346 case X86::BI__builtin_ia32_storedqusi128_mask:
15347 case X86::BI__builtin_ia32_storedquhi128_mask:
15348 case X86::BI__builtin_ia32_storedquqi128_mask:
15349 case X86::BI__builtin_ia32_storeupd128_mask:
15350 case X86::BI__builtin_ia32_storeups128_mask:
15351 case X86::BI__builtin_ia32_storedqudi256_mask:
15352 case X86::BI__builtin_ia32_storedqusi256_mask:
15353 case X86::BI__builtin_ia32_storedquhi256_mask:
15354 case X86::BI__builtin_ia32_storedquqi256_mask:
15355 case X86::BI__builtin_ia32_storeupd256_mask:
15356 case X86::BI__builtin_ia32_storeups256_mask:
15357 case X86::BI__builtin_ia32_storedqudi512_mask:
15358 case X86::BI__builtin_ia32_storedqusi512_mask:
15359 case X86::BI__builtin_ia32_storedquhi512_mask:
15360 case X86::BI__builtin_ia32_storedquqi512_mask:
15361 case X86::BI__builtin_ia32_storeupd512_mask:
15362 case X86::BI__builtin_ia32_storeups512_mask:
15363 return EmitX86MaskedStore(*this, Ops, Align(1));
15364
15365 case X86::BI__builtin_ia32_storesbf16128_mask:
15366 case X86::BI__builtin_ia32_storesh128_mask:
15367 case X86::BI__builtin_ia32_storess128_mask:
15368 case X86::BI__builtin_ia32_storesd128_mask:
15369 return EmitX86MaskedStore(*this, Ops, Align(1));
15370
15371 case X86::BI__builtin_ia32_cvtmask2b128:
15372 case X86::BI__builtin_ia32_cvtmask2b256:
15373 case X86::BI__builtin_ia32_cvtmask2b512:
15374 case X86::BI__builtin_ia32_cvtmask2w128:
15375 case X86::BI__builtin_ia32_cvtmask2w256:
15376 case X86::BI__builtin_ia32_cvtmask2w512:
15377 case X86::BI__builtin_ia32_cvtmask2d128:
15378 case X86::BI__builtin_ia32_cvtmask2d256:
15379 case X86::BI__builtin_ia32_cvtmask2d512:
15380 case X86::BI__builtin_ia32_cvtmask2q128:
15381 case X86::BI__builtin_ia32_cvtmask2q256:
15382 case X86::BI__builtin_ia32_cvtmask2q512:
15383 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
15384
15385 case X86::BI__builtin_ia32_cvtb2mask128:
15386 case X86::BI__builtin_ia32_cvtb2mask256:
15387 case X86::BI__builtin_ia32_cvtb2mask512:
15388 case X86::BI__builtin_ia32_cvtw2mask128:
15389 case X86::BI__builtin_ia32_cvtw2mask256:
15390 case X86::BI__builtin_ia32_cvtw2mask512:
15391 case X86::BI__builtin_ia32_cvtd2mask128:
15392 case X86::BI__builtin_ia32_cvtd2mask256:
15393 case X86::BI__builtin_ia32_cvtd2mask512:
15394 case X86::BI__builtin_ia32_cvtq2mask128:
15395 case X86::BI__builtin_ia32_cvtq2mask256:
15396 case X86::BI__builtin_ia32_cvtq2mask512:
15397 return EmitX86ConvertToMask(*this, Ops[0]);
15398
15399 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15400 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15401 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15402 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15403 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15404 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15405 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15406 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15407 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15408 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15409 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15410 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15411 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
15412 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15413 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15414 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15415 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15416 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15417 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15418 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15419 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15420 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15421 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15422 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15423 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15424 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
15425
15426 case X86::BI__builtin_ia32_vfmaddss3:
15427 case X86::BI__builtin_ia32_vfmaddsd3:
15428 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15429 case X86::BI__builtin_ia32_vfmaddss3_mask:
15430 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15431 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
15432 case X86::BI__builtin_ia32_vfmaddss:
15433 case X86::BI__builtin_ia32_vfmaddsd:
15434 return EmitScalarFMAExpr(*this, E, Ops,
15435 Constant::getNullValue(Ops[0]->getType()));
15436 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15437 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15438 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15439 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
15440 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15441 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15442 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15443 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
15444 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15445 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15446 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15447 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
15448 /*NegAcc*/ true);
15449 case X86::BI__builtin_ia32_vfmaddph:
15450 case X86::BI__builtin_ia32_vfmaddps:
15451 case X86::BI__builtin_ia32_vfmaddpd:
15452 case X86::BI__builtin_ia32_vfmaddph256:
15453 case X86::BI__builtin_ia32_vfmaddps256:
15454 case X86::BI__builtin_ia32_vfmaddpd256:
15455 case X86::BI__builtin_ia32_vfmaddph512_mask:
15456 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15457 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15458 case X86::BI__builtin_ia32_vfmaddnepbh128:
15459 case X86::BI__builtin_ia32_vfmaddnepbh256:
15460 case X86::BI__builtin_ia32_vfmaddnepbh512:
15461 case X86::BI__builtin_ia32_vfmaddps512_mask:
15462 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15463 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15464 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15465 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15466 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15467 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15468 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15469 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15470 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15471 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15472 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15473 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15474 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15475 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15476 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15477 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15478 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15479 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15480 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15481 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15482 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
15483 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15484 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15485 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15486 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15487 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15488 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15489 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15490 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15491 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15492 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15493 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15494 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15495 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15496 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15497 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15498 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15499 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15500 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15501 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15502 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15503 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15504 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15505 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15506 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15507 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
15508
15509 case X86::BI__builtin_ia32_movdqa32store128_mask:
15510 case X86::BI__builtin_ia32_movdqa64store128_mask:
15511 case X86::BI__builtin_ia32_storeaps128_mask:
15512 case X86::BI__builtin_ia32_storeapd128_mask:
15513 case X86::BI__builtin_ia32_movdqa32store256_mask:
15514 case X86::BI__builtin_ia32_movdqa64store256_mask:
15515 case X86::BI__builtin_ia32_storeaps256_mask:
15516 case X86::BI__builtin_ia32_storeapd256_mask:
15517 case X86::BI__builtin_ia32_movdqa32store512_mask:
15518 case X86::BI__builtin_ia32_movdqa64store512_mask:
15519 case X86::BI__builtin_ia32_storeaps512_mask:
15520 case X86::BI__builtin_ia32_storeapd512_mask:
15521 return EmitX86MaskedStore(
15522 *this, Ops,
15523 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15524
15525 case X86::BI__builtin_ia32_loadups128_mask:
15526 case X86::BI__builtin_ia32_loadups256_mask:
15527 case X86::BI__builtin_ia32_loadups512_mask:
15528 case X86::BI__builtin_ia32_loadupd128_mask:
15529 case X86::BI__builtin_ia32_loadupd256_mask:
15530 case X86::BI__builtin_ia32_loadupd512_mask:
15531 case X86::BI__builtin_ia32_loaddquqi128_mask:
15532 case X86::BI__builtin_ia32_loaddquqi256_mask:
15533 case X86::BI__builtin_ia32_loaddquqi512_mask:
15534 case X86::BI__builtin_ia32_loaddquhi128_mask:
15535 case X86::BI__builtin_ia32_loaddquhi256_mask:
15536 case X86::BI__builtin_ia32_loaddquhi512_mask:
15537 case X86::BI__builtin_ia32_loaddqusi128_mask:
15538 case X86::BI__builtin_ia32_loaddqusi256_mask:
15539 case X86::BI__builtin_ia32_loaddqusi512_mask:
15540 case X86::BI__builtin_ia32_loaddqudi128_mask:
15541 case X86::BI__builtin_ia32_loaddqudi256_mask:
15542 case X86::BI__builtin_ia32_loaddqudi512_mask:
15543 return EmitX86MaskedLoad(*this, Ops, Align(1));
15544
15545 case X86::BI__builtin_ia32_loadsbf16128_mask:
15546 case X86::BI__builtin_ia32_loadsh128_mask:
15547 case X86::BI__builtin_ia32_loadss128_mask:
15548 case X86::BI__builtin_ia32_loadsd128_mask:
15549 return EmitX86MaskedLoad(*this, Ops, Align(1));
15550
15551 case X86::BI__builtin_ia32_loadaps128_mask:
15552 case X86::BI__builtin_ia32_loadaps256_mask:
15553 case X86::BI__builtin_ia32_loadaps512_mask:
15554 case X86::BI__builtin_ia32_loadapd128_mask:
15555 case X86::BI__builtin_ia32_loadapd256_mask:
15556 case X86::BI__builtin_ia32_loadapd512_mask:
15557 case X86::BI__builtin_ia32_movdqa32load128_mask:
15558 case X86::BI__builtin_ia32_movdqa32load256_mask:
15559 case X86::BI__builtin_ia32_movdqa32load512_mask:
15560 case X86::BI__builtin_ia32_movdqa64load128_mask:
15561 case X86::BI__builtin_ia32_movdqa64load256_mask:
15562 case X86::BI__builtin_ia32_movdqa64load512_mask:
15563 return EmitX86MaskedLoad(
15564 *this, Ops,
15565 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15566
15567 case X86::BI__builtin_ia32_expandloaddf128_mask:
15568 case X86::BI__builtin_ia32_expandloaddf256_mask:
15569 case X86::BI__builtin_ia32_expandloaddf512_mask:
15570 case X86::BI__builtin_ia32_expandloadsf128_mask:
15571 case X86::BI__builtin_ia32_expandloadsf256_mask:
15572 case X86::BI__builtin_ia32_expandloadsf512_mask:
15573 case X86::BI__builtin_ia32_expandloaddi128_mask:
15574 case X86::BI__builtin_ia32_expandloaddi256_mask:
15575 case X86::BI__builtin_ia32_expandloaddi512_mask:
15576 case X86::BI__builtin_ia32_expandloadsi128_mask:
15577 case X86::BI__builtin_ia32_expandloadsi256_mask:
15578 case X86::BI__builtin_ia32_expandloadsi512_mask:
15579 case X86::BI__builtin_ia32_expandloadhi128_mask:
15580 case X86::BI__builtin_ia32_expandloadhi256_mask:
15581 case X86::BI__builtin_ia32_expandloadhi512_mask:
15582 case X86::BI__builtin_ia32_expandloadqi128_mask:
15583 case X86::BI__builtin_ia32_expandloadqi256_mask:
15584 case X86::BI__builtin_ia32_expandloadqi512_mask:
15585 return EmitX86ExpandLoad(*this, Ops);
15586
15587 case X86::BI__builtin_ia32_compressstoredf128_mask:
15588 case X86::BI__builtin_ia32_compressstoredf256_mask:
15589 case X86::BI__builtin_ia32_compressstoredf512_mask:
15590 case X86::BI__builtin_ia32_compressstoresf128_mask:
15591 case X86::BI__builtin_ia32_compressstoresf256_mask:
15592 case X86::BI__builtin_ia32_compressstoresf512_mask:
15593 case X86::BI__builtin_ia32_compressstoredi128_mask:
15594 case X86::BI__builtin_ia32_compressstoredi256_mask:
15595 case X86::BI__builtin_ia32_compressstoredi512_mask:
15596 case X86::BI__builtin_ia32_compressstoresi128_mask:
15597 case X86::BI__builtin_ia32_compressstoresi256_mask:
15598 case X86::BI__builtin_ia32_compressstoresi512_mask:
15599 case X86::BI__builtin_ia32_compressstorehi128_mask:
15600 case X86::BI__builtin_ia32_compressstorehi256_mask:
15601 case X86::BI__builtin_ia32_compressstorehi512_mask:
15602 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15603 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15604 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15605 return EmitX86CompressStore(*this, Ops);
15606
15607 case X86::BI__builtin_ia32_expanddf128_mask:
15608 case X86::BI__builtin_ia32_expanddf256_mask:
15609 case X86::BI__builtin_ia32_expanddf512_mask:
15610 case X86::BI__builtin_ia32_expandsf128_mask:
15611 case X86::BI__builtin_ia32_expandsf256_mask:
15612 case X86::BI__builtin_ia32_expandsf512_mask:
15613 case X86::BI__builtin_ia32_expanddi128_mask:
15614 case X86::BI__builtin_ia32_expanddi256_mask:
15615 case X86::BI__builtin_ia32_expanddi512_mask:
15616 case X86::BI__builtin_ia32_expandsi128_mask:
15617 case X86::BI__builtin_ia32_expandsi256_mask:
15618 case X86::BI__builtin_ia32_expandsi512_mask:
15619 case X86::BI__builtin_ia32_expandhi128_mask:
15620 case X86::BI__builtin_ia32_expandhi256_mask:
15621 case X86::BI__builtin_ia32_expandhi512_mask:
15622 case X86::BI__builtin_ia32_expandqi128_mask:
15623 case X86::BI__builtin_ia32_expandqi256_mask:
15624 case X86::BI__builtin_ia32_expandqi512_mask:
15625 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15626
15627 case X86::BI__builtin_ia32_compressdf128_mask:
15628 case X86::BI__builtin_ia32_compressdf256_mask:
15629 case X86::BI__builtin_ia32_compressdf512_mask:
15630 case X86::BI__builtin_ia32_compresssf128_mask:
15631 case X86::BI__builtin_ia32_compresssf256_mask:
15632 case X86::BI__builtin_ia32_compresssf512_mask:
15633 case X86::BI__builtin_ia32_compressdi128_mask:
15634 case X86::BI__builtin_ia32_compressdi256_mask:
15635 case X86::BI__builtin_ia32_compressdi512_mask:
15636 case X86::BI__builtin_ia32_compresssi128_mask:
15637 case X86::BI__builtin_ia32_compresssi256_mask:
15638 case X86::BI__builtin_ia32_compresssi512_mask:
15639 case X86::BI__builtin_ia32_compresshi128_mask:
15640 case X86::BI__builtin_ia32_compresshi256_mask:
15641 case X86::BI__builtin_ia32_compresshi512_mask:
15642 case X86::BI__builtin_ia32_compressqi128_mask:
15643 case X86::BI__builtin_ia32_compressqi256_mask:
15644 case X86::BI__builtin_ia32_compressqi512_mask:
15645 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15646
15647 case X86::BI__builtin_ia32_gather3div2df:
15648 case X86::BI__builtin_ia32_gather3div2di:
15649 case X86::BI__builtin_ia32_gather3div4df:
15650 case X86::BI__builtin_ia32_gather3div4di:
15651 case X86::BI__builtin_ia32_gather3div4sf:
15652 case X86::BI__builtin_ia32_gather3div4si:
15653 case X86::BI__builtin_ia32_gather3div8sf:
15654 case X86::BI__builtin_ia32_gather3div8si:
15655 case X86::BI__builtin_ia32_gather3siv2df:
15656 case X86::BI__builtin_ia32_gather3siv2di:
15657 case X86::BI__builtin_ia32_gather3siv4df:
15658 case X86::BI__builtin_ia32_gather3siv4di:
15659 case X86::BI__builtin_ia32_gather3siv4sf:
15660 case X86::BI__builtin_ia32_gather3siv4si:
15661 case X86::BI__builtin_ia32_gather3siv8sf:
15662 case X86::BI__builtin_ia32_gather3siv8si:
15663 case X86::BI__builtin_ia32_gathersiv8df:
15664 case X86::BI__builtin_ia32_gathersiv16sf:
15665 case X86::BI__builtin_ia32_gatherdiv8df:
15666 case X86::BI__builtin_ia32_gatherdiv16sf:
15667 case X86::BI__builtin_ia32_gathersiv8di:
15668 case X86::BI__builtin_ia32_gathersiv16si:
15669 case X86::BI__builtin_ia32_gatherdiv8di:
15670 case X86::BI__builtin_ia32_gatherdiv16si: {
15671 Intrinsic::ID IID;
15672 switch (BuiltinID) {
15673 default: llvm_unreachable("Unexpected builtin");
15674 case X86::BI__builtin_ia32_gather3div2df:
15675 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15676 break;
15677 case X86::BI__builtin_ia32_gather3div2di:
15678 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15679 break;
15680 case X86::BI__builtin_ia32_gather3div4df:
15681 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15682 break;
15683 case X86::BI__builtin_ia32_gather3div4di:
15684 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15685 break;
15686 case X86::BI__builtin_ia32_gather3div4sf:
15687 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15688 break;
15689 case X86::BI__builtin_ia32_gather3div4si:
15690 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15691 break;
15692 case X86::BI__builtin_ia32_gather3div8sf:
15693 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15694 break;
15695 case X86::BI__builtin_ia32_gather3div8si:
15696 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15697 break;
15698 case X86::BI__builtin_ia32_gather3siv2df:
15699 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15700 break;
15701 case X86::BI__builtin_ia32_gather3siv2di:
15702 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15703 break;
15704 case X86::BI__builtin_ia32_gather3siv4df:
15705 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15706 break;
15707 case X86::BI__builtin_ia32_gather3siv4di:
15708 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15709 break;
15710 case X86::BI__builtin_ia32_gather3siv4sf:
15711 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15712 break;
15713 case X86::BI__builtin_ia32_gather3siv4si:
15714 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15715 break;
15716 case X86::BI__builtin_ia32_gather3siv8sf:
15717 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15718 break;
15719 case X86::BI__builtin_ia32_gather3siv8si:
15720 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15721 break;
15722 case X86::BI__builtin_ia32_gathersiv8df:
15723 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15724 break;
15725 case X86::BI__builtin_ia32_gathersiv16sf:
15726 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15727 break;
15728 case X86::BI__builtin_ia32_gatherdiv8df:
15729 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15730 break;
15731 case X86::BI__builtin_ia32_gatherdiv16sf:
15732 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15733 break;
15734 case X86::BI__builtin_ia32_gathersiv8di:
15735 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15736 break;
15737 case X86::BI__builtin_ia32_gathersiv16si:
15738 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15739 break;
15740 case X86::BI__builtin_ia32_gatherdiv8di:
15741 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15742 break;
15743 case X86::BI__builtin_ia32_gatherdiv16si:
15744 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15745 break;
15746 }
15747
15748 unsigned MinElts = std::min(
15749 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15750 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15751 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15752 Function *Intr = CGM.getIntrinsic(IID);
15753 return Builder.CreateCall(Intr, Ops);
15754 }
15755
15756 case X86::BI__builtin_ia32_scattersiv8df:
15757 case X86::BI__builtin_ia32_scattersiv16sf:
15758 case X86::BI__builtin_ia32_scatterdiv8df:
15759 case X86::BI__builtin_ia32_scatterdiv16sf:
15760 case X86::BI__builtin_ia32_scattersiv8di:
15761 case X86::BI__builtin_ia32_scattersiv16si:
15762 case X86::BI__builtin_ia32_scatterdiv8di:
15763 case X86::BI__builtin_ia32_scatterdiv16si:
15764 case X86::BI__builtin_ia32_scatterdiv2df:
15765 case X86::BI__builtin_ia32_scatterdiv2di:
15766 case X86::BI__builtin_ia32_scatterdiv4df:
15767 case X86::BI__builtin_ia32_scatterdiv4di:
15768 case X86::BI__builtin_ia32_scatterdiv4sf:
15769 case X86::BI__builtin_ia32_scatterdiv4si:
15770 case X86::BI__builtin_ia32_scatterdiv8sf:
15771 case X86::BI__builtin_ia32_scatterdiv8si:
15772 case X86::BI__builtin_ia32_scattersiv2df:
15773 case X86::BI__builtin_ia32_scattersiv2di:
15774 case X86::BI__builtin_ia32_scattersiv4df:
15775 case X86::BI__builtin_ia32_scattersiv4di:
15776 case X86::BI__builtin_ia32_scattersiv4sf:
15777 case X86::BI__builtin_ia32_scattersiv4si:
15778 case X86::BI__builtin_ia32_scattersiv8sf:
15779 case X86::BI__builtin_ia32_scattersiv8si: {
15780 Intrinsic::ID IID;
15781 switch (BuiltinID) {
15782 default: llvm_unreachable("Unexpected builtin");
15783 case X86::BI__builtin_ia32_scattersiv8df:
15784 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15785 break;
15786 case X86::BI__builtin_ia32_scattersiv16sf:
15787 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15788 break;
15789 case X86::BI__builtin_ia32_scatterdiv8df:
15790 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15791 break;
15792 case X86::BI__builtin_ia32_scatterdiv16sf:
15793 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15794 break;
15795 case X86::BI__builtin_ia32_scattersiv8di:
15796 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15797 break;
15798 case X86::BI__builtin_ia32_scattersiv16si:
15799 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15800 break;
15801 case X86::BI__builtin_ia32_scatterdiv8di:
15802 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15803 break;
15804 case X86::BI__builtin_ia32_scatterdiv16si:
15805 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15806 break;
15807 case X86::BI__builtin_ia32_scatterdiv2df:
15808 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15809 break;
15810 case X86::BI__builtin_ia32_scatterdiv2di:
15811 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15812 break;
15813 case X86::BI__builtin_ia32_scatterdiv4df:
15814 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15815 break;
15816 case X86::BI__builtin_ia32_scatterdiv4di:
15817 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15818 break;
15819 case X86::BI__builtin_ia32_scatterdiv4sf:
15820 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15821 break;
15822 case X86::BI__builtin_ia32_scatterdiv4si:
15823 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15824 break;
15825 case X86::BI__builtin_ia32_scatterdiv8sf:
15826 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15827 break;
15828 case X86::BI__builtin_ia32_scatterdiv8si:
15829 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15830 break;
15831 case X86::BI__builtin_ia32_scattersiv2df:
15832 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15833 break;
15834 case X86::BI__builtin_ia32_scattersiv2di:
15835 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15836 break;
15837 case X86::BI__builtin_ia32_scattersiv4df:
15838 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15839 break;
15840 case X86::BI__builtin_ia32_scattersiv4di:
15841 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15842 break;
15843 case X86::BI__builtin_ia32_scattersiv4sf:
15844 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15845 break;
15846 case X86::BI__builtin_ia32_scattersiv4si:
15847 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15848 break;
15849 case X86::BI__builtin_ia32_scattersiv8sf:
15850 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15851 break;
15852 case X86::BI__builtin_ia32_scattersiv8si:
15853 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15854 break;
15855 }
15856
15857 unsigned MinElts = std::min(
15858 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15859 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15860 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15861 Function *Intr = CGM.getIntrinsic(IID);
15862 return Builder.CreateCall(Intr, Ops);
15863 }
15864
15865 case X86::BI__builtin_ia32_vextractf128_pd256:
15866 case X86::BI__builtin_ia32_vextractf128_ps256:
15867 case X86::BI__builtin_ia32_vextractf128_si256:
15868 case X86::BI__builtin_ia32_extract128i256:
15869 case X86::BI__builtin_ia32_extractf64x4_mask:
15870 case X86::BI__builtin_ia32_extractf32x4_mask:
15871 case X86::BI__builtin_ia32_extracti64x4_mask:
15872 case X86::BI__builtin_ia32_extracti32x4_mask:
15873 case X86::BI__builtin_ia32_extractf32x8_mask:
15874 case X86::BI__builtin_ia32_extracti32x8_mask:
15875 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15876 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15877 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15878 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15879 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15880 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15881 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15882 unsigned NumElts = DstTy->getNumElements();
15883 unsigned SrcNumElts =
15884 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15885 unsigned SubVectors = SrcNumElts / NumElts;
15886 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15887 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15888 Index &= SubVectors - 1; // Remove any extra bits.
15889 Index *= NumElts;
15890
15891 int Indices[16];
15892 for (unsigned i = 0; i != NumElts; ++i)
15893 Indices[i] = i + Index;
15894
15895 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15896 "extract");
15897
15898 if (Ops.size() == 4)
15899 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15900
15901 return Res;
15902 }
15903 case X86::BI__builtin_ia32_vinsertf128_pd256:
15904 case X86::BI__builtin_ia32_vinsertf128_ps256:
15905 case X86::BI__builtin_ia32_vinsertf128_si256:
15906 case X86::BI__builtin_ia32_insert128i256:
15907 case X86::BI__builtin_ia32_insertf64x4:
15908 case X86::BI__builtin_ia32_insertf32x4:
15909 case X86::BI__builtin_ia32_inserti64x4:
15910 case X86::BI__builtin_ia32_inserti32x4:
15911 case X86::BI__builtin_ia32_insertf32x8:
15912 case X86::BI__builtin_ia32_inserti32x8:
15913 case X86::BI__builtin_ia32_insertf32x4_256:
15914 case X86::BI__builtin_ia32_inserti32x4_256:
15915 case X86::BI__builtin_ia32_insertf64x2_256:
15916 case X86::BI__builtin_ia32_inserti64x2_256:
15917 case X86::BI__builtin_ia32_insertf64x2_512:
15918 case X86::BI__builtin_ia32_inserti64x2_512: {
15919 unsigned DstNumElts =
15920 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15921 unsigned SrcNumElts =
15922 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15923 unsigned SubVectors = DstNumElts / SrcNumElts;
15924 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15925 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15926 Index &= SubVectors - 1; // Remove any extra bits.
15927 Index *= SrcNumElts;
15928
15929 int Indices[16];
15930 for (unsigned i = 0; i != DstNumElts; ++i)
15931 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15932
15933 Value *Op1 = Builder.CreateShuffleVector(
15934 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15935
15936 for (unsigned i = 0; i != DstNumElts; ++i) {
15937 if (i >= Index && i < (Index + SrcNumElts))
15938 Indices[i] = (i - Index) + DstNumElts;
15939 else
15940 Indices[i] = i;
15941 }
15942
15943 return Builder.CreateShuffleVector(Ops[0], Op1,
15944 ArrayRef(Indices, DstNumElts), "insert");
15945 }
15946 case X86::BI__builtin_ia32_pmovqd512_mask:
15947 case X86::BI__builtin_ia32_pmovwb512_mask: {
15948 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15949 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15950 }
15951 case X86::BI__builtin_ia32_pmovdb512_mask:
15952 case X86::BI__builtin_ia32_pmovdw512_mask:
15953 case X86::BI__builtin_ia32_pmovqw512_mask: {
15954 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15955 if (C->isAllOnesValue())
15956 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15957
15958 Intrinsic::ID IID;
15959 switch (BuiltinID) {
15960 default: llvm_unreachable("Unsupported intrinsic!");
15961 case X86::BI__builtin_ia32_pmovdb512_mask:
15962 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15963 break;
15964 case X86::BI__builtin_ia32_pmovdw512_mask:
15965 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15966 break;
15967 case X86::BI__builtin_ia32_pmovqw512_mask:
15968 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15969 break;
15970 }
15971
15972 Function *Intr = CGM.getIntrinsic(IID);
15973 return Builder.CreateCall(Intr, Ops);
15974 }
15975 case X86::BI__builtin_ia32_pblendw128:
15976 case X86::BI__builtin_ia32_blendpd:
15977 case X86::BI__builtin_ia32_blendps:
15978 case X86::BI__builtin_ia32_blendpd256:
15979 case X86::BI__builtin_ia32_blendps256:
15980 case X86::BI__builtin_ia32_pblendw256:
15981 case X86::BI__builtin_ia32_pblendd128:
15982 case X86::BI__builtin_ia32_pblendd256: {
15983 unsigned NumElts =
15984 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15985 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15986
15987 int Indices[16];
15988 // If there are more than 8 elements, the immediate is used twice so make
15989 // sure we handle that.
15990 for (unsigned i = 0; i != NumElts; ++i)
15991 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15992
15993 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15994 ArrayRef(Indices, NumElts), "blend");
15995 }
15996 case X86::BI__builtin_ia32_pshuflw:
15997 case X86::BI__builtin_ia32_pshuflw256:
15998 case X86::BI__builtin_ia32_pshuflw512: {
15999 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16000 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16001 unsigned NumElts = Ty->getNumElements();
16002
16003 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16004 Imm = (Imm & 0xff) * 0x01010101;
16005
16006 int Indices[32];
16007 for (unsigned l = 0; l != NumElts; l += 8) {
16008 for (unsigned i = 0; i != 4; ++i) {
16009 Indices[l + i] = l + (Imm & 3);
16010 Imm >>= 2;
16011 }
16012 for (unsigned i = 4; i != 8; ++i)
16013 Indices[l + i] = l + i;
16014 }
16015
16016 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16017 "pshuflw");
16018 }
16019 case X86::BI__builtin_ia32_pshufhw:
16020 case X86::BI__builtin_ia32_pshufhw256:
16021 case X86::BI__builtin_ia32_pshufhw512: {
16022 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16023 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16024 unsigned NumElts = Ty->getNumElements();
16025
16026 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16027 Imm = (Imm & 0xff) * 0x01010101;
16028
16029 int Indices[32];
16030 for (unsigned l = 0; l != NumElts; l += 8) {
16031 for (unsigned i = 0; i != 4; ++i)
16032 Indices[l + i] = l + i;
16033 for (unsigned i = 4; i != 8; ++i) {
16034 Indices[l + i] = l + 4 + (Imm & 3);
16035 Imm >>= 2;
16036 }
16037 }
16038
16039 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16040 "pshufhw");
16041 }
16042 case X86::BI__builtin_ia32_pshufd:
16043 case X86::BI__builtin_ia32_pshufd256:
16044 case X86::BI__builtin_ia32_pshufd512:
16045 case X86::BI__builtin_ia32_vpermilpd:
16046 case X86::BI__builtin_ia32_vpermilps:
16047 case X86::BI__builtin_ia32_vpermilpd256:
16048 case X86::BI__builtin_ia32_vpermilps256:
16049 case X86::BI__builtin_ia32_vpermilpd512:
16050 case X86::BI__builtin_ia32_vpermilps512: {
16051 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16052 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16053 unsigned NumElts = Ty->getNumElements();
16054 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16055 unsigned NumLaneElts = NumElts / NumLanes;
16056
16057 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16058 Imm = (Imm & 0xff) * 0x01010101;
16059
16060 int Indices[16];
16061 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16062 for (unsigned i = 0; i != NumLaneElts; ++i) {
16063 Indices[i + l] = (Imm % NumLaneElts) + l;
16064 Imm /= NumLaneElts;
16065 }
16066 }
16067
16068 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16069 "permil");
16070 }
16071 case X86::BI__builtin_ia32_shufpd:
16072 case X86::BI__builtin_ia32_shufpd256:
16073 case X86::BI__builtin_ia32_shufpd512:
16074 case X86::BI__builtin_ia32_shufps:
16075 case X86::BI__builtin_ia32_shufps256:
16076 case X86::BI__builtin_ia32_shufps512: {
16077 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16078 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16079 unsigned NumElts = Ty->getNumElements();
16080 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16081 unsigned NumLaneElts = NumElts / NumLanes;
16082
16083 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16084 Imm = (Imm & 0xff) * 0x01010101;
16085
16086 int Indices[16];
16087 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16088 for (unsigned i = 0; i != NumLaneElts; ++i) {
16089 unsigned Index = Imm % NumLaneElts;
16090 Imm /= NumLaneElts;
16091 if (i >= (NumLaneElts / 2))
16092 Index += NumElts;
16093 Indices[l + i] = l + Index;
16094 }
16095 }
16096
16097 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16098 ArrayRef(Indices, NumElts), "shufp");
16099 }
16100 case X86::BI__builtin_ia32_permdi256:
16101 case X86::BI__builtin_ia32_permdf256:
16102 case X86::BI__builtin_ia32_permdi512:
16103 case X86::BI__builtin_ia32_permdf512: {
16104 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16105 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16106 unsigned NumElts = Ty->getNumElements();
16107
16108 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
16109 int Indices[8];
16110 for (unsigned l = 0; l != NumElts; l += 4)
16111 for (unsigned i = 0; i != 4; ++i)
16112 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
16113
16114 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16115 "perm");
16116 }
16117 case X86::BI__builtin_ia32_palignr128:
16118 case X86::BI__builtin_ia32_palignr256:
16119 case X86::BI__builtin_ia32_palignr512: {
16120 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16121
16122 unsigned NumElts =
16123 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16124 assert(NumElts % 16 == 0);
16125
16126 // If palignr is shifting the pair of vectors more than the size of two
16127 // lanes, emit zero.
16128 if (ShiftVal >= 32)
16129 return llvm::Constant::getNullValue(ConvertType(E->getType()));
16130
16131 // If palignr is shifting the pair of input vectors more than one lane,
16132 // but less than two lanes, convert to shifting in zeroes.
16133 if (ShiftVal > 16) {
16134 ShiftVal -= 16;
16135 Ops[1] = Ops[0];
16136 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
16137 }
16138
16139 int Indices[64];
16140 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16141 for (unsigned l = 0; l != NumElts; l += 16) {
16142 for (unsigned i = 0; i != 16; ++i) {
16143 unsigned Idx = ShiftVal + i;
16144 if (Idx >= 16)
16145 Idx += NumElts - 16; // End of lane, switch operand.
16146 Indices[l + i] = Idx + l;
16147 }
16148 }
16149
16150 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16151 ArrayRef(Indices, NumElts), "palignr");
16152 }
16153 case X86::BI__builtin_ia32_alignd128:
16154 case X86::BI__builtin_ia32_alignd256:
16155 case X86::BI__builtin_ia32_alignd512:
16156 case X86::BI__builtin_ia32_alignq128:
16157 case X86::BI__builtin_ia32_alignq256:
16158 case X86::BI__builtin_ia32_alignq512: {
16159 unsigned NumElts =
16160 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16161 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16162
16163 // Mask the shift amount to width of a vector.
16164 ShiftVal &= NumElts - 1;
16165
16166 int Indices[16];
16167 for (unsigned i = 0; i != NumElts; ++i)
16168 Indices[i] = i + ShiftVal;
16169
16170 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16171 ArrayRef(Indices, NumElts), "valign");
16172 }
16173 case X86::BI__builtin_ia32_shuf_f32x4_256:
16174 case X86::BI__builtin_ia32_shuf_f64x2_256:
16175 case X86::BI__builtin_ia32_shuf_i32x4_256:
16176 case X86::BI__builtin_ia32_shuf_i64x2_256:
16177 case X86::BI__builtin_ia32_shuf_f32x4:
16178 case X86::BI__builtin_ia32_shuf_f64x2:
16179 case X86::BI__builtin_ia32_shuf_i32x4:
16180 case X86::BI__builtin_ia32_shuf_i64x2: {
16181 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16182 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16183 unsigned NumElts = Ty->getNumElements();
16184 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16185 unsigned NumLaneElts = NumElts / NumLanes;
16186
16187 int Indices[16];
16188 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16189 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16190 Imm /= NumLanes; // Discard the bits we just used.
16191 if (l >= (NumElts / 2))
16192 Index += NumElts; // Switch to other source.
16193 for (unsigned i = 0; i != NumLaneElts; ++i) {
16194 Indices[l + i] = Index + i;
16195 }
16196 }
16197
16198 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16199 ArrayRef(Indices, NumElts), "shuf");
16200 }
16201
16202 case X86::BI__builtin_ia32_vperm2f128_pd256:
16203 case X86::BI__builtin_ia32_vperm2f128_ps256:
16204 case X86::BI__builtin_ia32_vperm2f128_si256:
16205 case X86::BI__builtin_ia32_permti256: {
16206 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16207 unsigned NumElts =
16208 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16209
16210 // This takes a very simple approach since there are two lanes and a
16211 // shuffle can have 2 inputs. So we reserve the first input for the first
16212 // lane and the second input for the second lane. This may result in
16213 // duplicate sources, but this can be dealt with in the backend.
16214
16215 Value *OutOps[2];
16216 int Indices[8];
16217 for (unsigned l = 0; l != 2; ++l) {
16218 // Determine the source for this lane.
16219 if (Imm & (1 << ((l * 4) + 3)))
16220 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16221 else if (Imm & (1 << ((l * 4) + 1)))
16222 OutOps[l] = Ops[1];
16223 else
16224 OutOps[l] = Ops[0];
16225
16226 for (unsigned i = 0; i != NumElts/2; ++i) {
16227 // Start with ith element of the source for this lane.
16228 unsigned Idx = (l * NumElts) + i;
16229 // If bit 0 of the immediate half is set, switch to the high half of
16230 // the source.
16231 if (Imm & (1 << (l * 4)))
16232 Idx += NumElts/2;
16233 Indices[(l * (NumElts/2)) + i] = Idx;
16234 }
16235 }
16236
16237 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16238 ArrayRef(Indices, NumElts), "vperm");
16239 }
16240
16241 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16242 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16243 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16244 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16245 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16246 // Builtin type is vXi64 so multiply by 8 to get bytes.
16247 unsigned NumElts = ResultType->getNumElements() * 8;
16248
16249 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16250 if (ShiftVal >= 16)
16251 return llvm::Constant::getNullValue(ResultType);
16252
16253 int Indices[64];
16254 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16255 for (unsigned l = 0; l != NumElts; l += 16) {
16256 for (unsigned i = 0; i != 16; ++i) {
16257 unsigned Idx = NumElts + i - ShiftVal;
16258 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
16259 Indices[l + i] = Idx + l;
16260 }
16261 }
16262
16263 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16264 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16265 Value *Zero = llvm::Constant::getNullValue(VecTy);
16266 Value *SV = Builder.CreateShuffleVector(
16267 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
16268 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
16269 }
16270 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16271 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16272 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16273 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16274 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16275 // Builtin type is vXi64 so multiply by 8 to get bytes.
16276 unsigned NumElts = ResultType->getNumElements() * 8;
16277
16278 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16279 if (ShiftVal >= 16)
16280 return llvm::Constant::getNullValue(ResultType);
16281
16282 int Indices[64];
16283 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16284 for (unsigned l = 0; l != NumElts; l += 16) {
16285 for (unsigned i = 0; i != 16; ++i) {
16286 unsigned Idx = i + ShiftVal;
16287 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
16288 Indices[l + i] = Idx + l;
16289 }
16290 }
16291
16292 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16293 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16294 Value *Zero = llvm::Constant::getNullValue(VecTy);
16295 Value *SV = Builder.CreateShuffleVector(
16296 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
16297 return Builder.CreateBitCast(SV, ResultType, "cast");
16298 }
16299 case X86::BI__builtin_ia32_kshiftliqi:
16300 case X86::BI__builtin_ia32_kshiftlihi:
16301 case X86::BI__builtin_ia32_kshiftlisi:
16302 case X86::BI__builtin_ia32_kshiftlidi: {
16303 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16304 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16305
16306 if (ShiftVal >= NumElts)
16307 return llvm::Constant::getNullValue(Ops[0]->getType());
16308
16309 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16310
16311 int Indices[64];
16312 for (unsigned i = 0; i != NumElts; ++i)
16313 Indices[i] = NumElts + i - ShiftVal;
16314
16315 Value *Zero = llvm::Constant::getNullValue(In->getType());
16316 Value *SV = Builder.CreateShuffleVector(
16317 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
16318 return Builder.CreateBitCast(SV, Ops[0]->getType());
16319 }
16320 case X86::BI__builtin_ia32_kshiftriqi:
16321 case X86::BI__builtin_ia32_kshiftrihi:
16322 case X86::BI__builtin_ia32_kshiftrisi:
16323 case X86::BI__builtin_ia32_kshiftridi: {
16324 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16325 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16326
16327 if (ShiftVal >= NumElts)
16328 return llvm::Constant::getNullValue(Ops[0]->getType());
16329
16330 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16331
16332 int Indices[64];
16333 for (unsigned i = 0; i != NumElts; ++i)
16334 Indices[i] = i + ShiftVal;
16335
16336 Value *Zero = llvm::Constant::getNullValue(In->getType());
16337 Value *SV = Builder.CreateShuffleVector(
16338 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
16339 return Builder.CreateBitCast(SV, Ops[0]->getType());
16340 }
16341 case X86::BI__builtin_ia32_movnti:
16342 case X86::BI__builtin_ia32_movnti64:
16343 case X86::BI__builtin_ia32_movntsd:
16344 case X86::BI__builtin_ia32_movntss: {
16345 llvm::MDNode *Node = llvm::MDNode::get(
16346 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
16347
16348 Value *Ptr = Ops[0];
16349 Value *Src = Ops[1];
16350
16351 // Extract the 0'th element of the source vector.
16352 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16353 BuiltinID == X86::BI__builtin_ia32_movntss)
16354 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
16355
16356 // Unaligned nontemporal store of the scalar value.
16357 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
16358 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
16359 SI->setAlignment(llvm::Align(1));
16360 return SI;
16361 }
16362 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16363 case X86::BI__builtin_ia32_vprotb:
16364 case X86::BI__builtin_ia32_vprotw:
16365 case X86::BI__builtin_ia32_vprotd:
16366 case X86::BI__builtin_ia32_vprotq:
16367 case X86::BI__builtin_ia32_vprotbi:
16368 case X86::BI__builtin_ia32_vprotwi:
16369 case X86::BI__builtin_ia32_vprotdi:
16370 case X86::BI__builtin_ia32_vprotqi:
16371 case X86::BI__builtin_ia32_prold128:
16372 case X86::BI__builtin_ia32_prold256:
16373 case X86::BI__builtin_ia32_prold512:
16374 case X86::BI__builtin_ia32_prolq128:
16375 case X86::BI__builtin_ia32_prolq256:
16376 case X86::BI__builtin_ia32_prolq512:
16377 case X86::BI__builtin_ia32_prolvd128:
16378 case X86::BI__builtin_ia32_prolvd256:
16379 case X86::BI__builtin_ia32_prolvd512:
16380 case X86::BI__builtin_ia32_prolvq128:
16381 case X86::BI__builtin_ia32_prolvq256:
16382 case X86::BI__builtin_ia32_prolvq512:
16383 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
16384 case X86::BI__builtin_ia32_prord128:
16385 case X86::BI__builtin_ia32_prord256:
16386 case X86::BI__builtin_ia32_prord512:
16387 case X86::BI__builtin_ia32_prorq128:
16388 case X86::BI__builtin_ia32_prorq256:
16389 case X86::BI__builtin_ia32_prorq512:
16390 case X86::BI__builtin_ia32_prorvd128:
16391 case X86::BI__builtin_ia32_prorvd256:
16392 case X86::BI__builtin_ia32_prorvd512:
16393 case X86::BI__builtin_ia32_prorvq128:
16394 case X86::BI__builtin_ia32_prorvq256:
16395 case X86::BI__builtin_ia32_prorvq512:
16396 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
16397 case X86::BI__builtin_ia32_selectb_128:
16398 case X86::BI__builtin_ia32_selectb_256:
16399 case X86::BI__builtin_ia32_selectb_512:
16400 case X86::BI__builtin_ia32_selectw_128:
16401 case X86::BI__builtin_ia32_selectw_256:
16402 case X86::BI__builtin_ia32_selectw_512:
16403 case X86::BI__builtin_ia32_selectd_128:
16404 case X86::BI__builtin_ia32_selectd_256:
16405 case X86::BI__builtin_ia32_selectd_512:
16406 case X86::BI__builtin_ia32_selectq_128:
16407 case X86::BI__builtin_ia32_selectq_256:
16408 case X86::BI__builtin_ia32_selectq_512:
16409 case X86::BI__builtin_ia32_selectph_128:
16410 case X86::BI__builtin_ia32_selectph_256:
16411 case X86::BI__builtin_ia32_selectph_512:
16412 case X86::BI__builtin_ia32_selectpbf_128:
16413 case X86::BI__builtin_ia32_selectpbf_256:
16414 case X86::BI__builtin_ia32_selectpbf_512:
16415 case X86::BI__builtin_ia32_selectps_128:
16416 case X86::BI__builtin_ia32_selectps_256:
16417 case X86::BI__builtin_ia32_selectps_512:
16418 case X86::BI__builtin_ia32_selectpd_128:
16419 case X86::BI__builtin_ia32_selectpd_256:
16420 case X86::BI__builtin_ia32_selectpd_512:
16421 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
16422 case X86::BI__builtin_ia32_selectsh_128:
16423 case X86::BI__builtin_ia32_selectsbf_128:
16424 case X86::BI__builtin_ia32_selectss_128:
16425 case X86::BI__builtin_ia32_selectsd_128: {
16426 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16427 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16428 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
16429 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16430 }
16431 case X86::BI__builtin_ia32_cmpb128_mask:
16432 case X86::BI__builtin_ia32_cmpb256_mask:
16433 case X86::BI__builtin_ia32_cmpb512_mask:
16434 case X86::BI__builtin_ia32_cmpw128_mask:
16435 case X86::BI__builtin_ia32_cmpw256_mask:
16436 case X86::BI__builtin_ia32_cmpw512_mask:
16437 case X86::BI__builtin_ia32_cmpd128_mask:
16438 case X86::BI__builtin_ia32_cmpd256_mask:
16439 case X86::BI__builtin_ia32_cmpd512_mask:
16440 case X86::BI__builtin_ia32_cmpq128_mask:
16441 case X86::BI__builtin_ia32_cmpq256_mask:
16442 case X86::BI__builtin_ia32_cmpq512_mask: {
16443 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16444 return EmitX86MaskedCompare(*this, CC, true, Ops);
16445 }
16446 case X86::BI__builtin_ia32_ucmpb128_mask:
16447 case X86::BI__builtin_ia32_ucmpb256_mask:
16448 case X86::BI__builtin_ia32_ucmpb512_mask:
16449 case X86::BI__builtin_ia32_ucmpw128_mask:
16450 case X86::BI__builtin_ia32_ucmpw256_mask:
16451 case X86::BI__builtin_ia32_ucmpw512_mask:
16452 case X86::BI__builtin_ia32_ucmpd128_mask:
16453 case X86::BI__builtin_ia32_ucmpd256_mask:
16454 case X86::BI__builtin_ia32_ucmpd512_mask:
16455 case X86::BI__builtin_ia32_ucmpq128_mask:
16456 case X86::BI__builtin_ia32_ucmpq256_mask:
16457 case X86::BI__builtin_ia32_ucmpq512_mask: {
16458 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16459 return EmitX86MaskedCompare(*this, CC, false, Ops);
16460 }
16461 case X86::BI__builtin_ia32_vpcomb:
16462 case X86::BI__builtin_ia32_vpcomw:
16463 case X86::BI__builtin_ia32_vpcomd:
16464 case X86::BI__builtin_ia32_vpcomq:
16465 return EmitX86vpcom(*this, Ops, true);
16466 case X86::BI__builtin_ia32_vpcomub:
16467 case X86::BI__builtin_ia32_vpcomuw:
16468 case X86::BI__builtin_ia32_vpcomud:
16469 case X86::BI__builtin_ia32_vpcomuq:
16470 return EmitX86vpcom(*this, Ops, false);
16471
16472 case X86::BI__builtin_ia32_kortestcqi:
16473 case X86::BI__builtin_ia32_kortestchi:
16474 case X86::BI__builtin_ia32_kortestcsi:
16475 case X86::BI__builtin_ia32_kortestcdi: {
16476 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16477 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16478 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16479 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16480 }
16481 case X86::BI__builtin_ia32_kortestzqi:
16482 case X86::BI__builtin_ia32_kortestzhi:
16483 case X86::BI__builtin_ia32_kortestzsi:
16484 case X86::BI__builtin_ia32_kortestzdi: {
16485 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16486 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
16487 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16488 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16489 }
16490
16491 case X86::BI__builtin_ia32_ktestcqi:
16492 case X86::BI__builtin_ia32_ktestzqi:
16493 case X86::BI__builtin_ia32_ktestchi:
16494 case X86::BI__builtin_ia32_ktestzhi:
16495 case X86::BI__builtin_ia32_ktestcsi:
16496 case X86::BI__builtin_ia32_ktestzsi:
16497 case X86::BI__builtin_ia32_ktestcdi:
16498 case X86::BI__builtin_ia32_ktestzdi: {
16499 Intrinsic::ID IID;
16500 switch (BuiltinID) {
16501 default: llvm_unreachable("Unsupported intrinsic!");
16502 case X86::BI__builtin_ia32_ktestcqi:
16503 IID = Intrinsic::x86_avx512_ktestc_b;
16504 break;
16505 case X86::BI__builtin_ia32_ktestzqi:
16506 IID = Intrinsic::x86_avx512_ktestz_b;
16507 break;
16508 case X86::BI__builtin_ia32_ktestchi:
16509 IID = Intrinsic::x86_avx512_ktestc_w;
16510 break;
16511 case X86::BI__builtin_ia32_ktestzhi:
16512 IID = Intrinsic::x86_avx512_ktestz_w;
16513 break;
16514 case X86::BI__builtin_ia32_ktestcsi:
16515 IID = Intrinsic::x86_avx512_ktestc_d;
16516 break;
16517 case X86::BI__builtin_ia32_ktestzsi:
16518 IID = Intrinsic::x86_avx512_ktestz_d;
16519 break;
16520 case X86::BI__builtin_ia32_ktestcdi:
16521 IID = Intrinsic::x86_avx512_ktestc_q;
16522 break;
16523 case X86::BI__builtin_ia32_ktestzdi:
16524 IID = Intrinsic::x86_avx512_ktestz_q;
16525 break;
16526 }
16527
16528 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16529 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16530 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16531 Function *Intr = CGM.getIntrinsic(IID);
16532 return Builder.CreateCall(Intr, {LHS, RHS});
16533 }
16534
16535 case X86::BI__builtin_ia32_kaddqi:
16536 case X86::BI__builtin_ia32_kaddhi:
16537 case X86::BI__builtin_ia32_kaddsi:
16538 case X86::BI__builtin_ia32_kadddi: {
16539 Intrinsic::ID IID;
16540 switch (BuiltinID) {
16541 default: llvm_unreachable("Unsupported intrinsic!");
16542 case X86::BI__builtin_ia32_kaddqi:
16543 IID = Intrinsic::x86_avx512_kadd_b;
16544 break;
16545 case X86::BI__builtin_ia32_kaddhi:
16546 IID = Intrinsic::x86_avx512_kadd_w;
16547 break;
16548 case X86::BI__builtin_ia32_kaddsi:
16549 IID = Intrinsic::x86_avx512_kadd_d;
16550 break;
16551 case X86::BI__builtin_ia32_kadddi:
16552 IID = Intrinsic::x86_avx512_kadd_q;
16553 break;
16554 }
16555
16556 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16557 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16558 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16559 Function *Intr = CGM.getIntrinsic(IID);
16560 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
16561 return Builder.CreateBitCast(Res, Ops[0]->getType());
16562 }
16563 case X86::BI__builtin_ia32_kandqi:
16564 case X86::BI__builtin_ia32_kandhi:
16565 case X86::BI__builtin_ia32_kandsi:
16566 case X86::BI__builtin_ia32_kanddi:
16567 return EmitX86MaskLogic(*this, Instruction::And, Ops);
16568 case X86::BI__builtin_ia32_kandnqi:
16569 case X86::BI__builtin_ia32_kandnhi:
16570 case X86::BI__builtin_ia32_kandnsi:
16571 case X86::BI__builtin_ia32_kandndi:
16572 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
16573 case X86::BI__builtin_ia32_korqi:
16574 case X86::BI__builtin_ia32_korhi:
16575 case X86::BI__builtin_ia32_korsi:
16576 case X86::BI__builtin_ia32_kordi:
16577 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
16578 case X86::BI__builtin_ia32_kxnorqi:
16579 case X86::BI__builtin_ia32_kxnorhi:
16580 case X86::BI__builtin_ia32_kxnorsi:
16581 case X86::BI__builtin_ia32_kxnordi:
16582 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
16583 case X86::BI__builtin_ia32_kxorqi:
16584 case X86::BI__builtin_ia32_kxorhi:
16585 case X86::BI__builtin_ia32_kxorsi:
16586 case X86::BI__builtin_ia32_kxordi:
16587 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
16588 case X86::BI__builtin_ia32_knotqi:
16589 case X86::BI__builtin_ia32_knothi:
16590 case X86::BI__builtin_ia32_knotsi:
16591 case X86::BI__builtin_ia32_knotdi: {
16592 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16593 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16594 return Builder.CreateBitCast(Builder.CreateNot(Res),
16595 Ops[0]->getType());
16596 }
16597 case X86::BI__builtin_ia32_kmovb:
16598 case X86::BI__builtin_ia32_kmovw:
16599 case X86::BI__builtin_ia32_kmovd:
16600 case X86::BI__builtin_ia32_kmovq: {
16601 // Bitcast to vXi1 type and then back to integer. This gets the mask
16602 // register type into the IR, but might be optimized out depending on
16603 // what's around it.
16604 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16605 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16606 return Builder.CreateBitCast(Res, Ops[0]->getType());
16607 }
16608
16609 case X86::BI__builtin_ia32_kunpckdi:
16610 case X86::BI__builtin_ia32_kunpcksi:
16611 case X86::BI__builtin_ia32_kunpckhi: {
16612 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16613 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16614 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16615 int Indices[64];
16616 for (unsigned i = 0; i != NumElts; ++i)
16617 Indices[i] = i;
16618
16619 // First extract half of each vector. This gives better codegen than
16620 // doing it in a single shuffle.
16621 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
16622 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16623 // Concat the vectors.
16624 // NOTE: Operands are swapped to match the intrinsic definition.
16625 Value *Res =
16626 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16627 return Builder.CreateBitCast(Res, Ops[0]->getType());
16628 }
16629
16630 case X86::BI__builtin_ia32_vplzcntd_128:
16631 case X86::BI__builtin_ia32_vplzcntd_256:
16632 case X86::BI__builtin_ia32_vplzcntd_512:
16633 case X86::BI__builtin_ia32_vplzcntq_128:
16634 case X86::BI__builtin_ia32_vplzcntq_256:
16635 case X86::BI__builtin_ia32_vplzcntq_512: {
16636 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16637 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16638 }
16639 case X86::BI__builtin_ia32_sqrtss:
16640 case X86::BI__builtin_ia32_sqrtsd: {
16641 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16642 Function *F;
16643 if (Builder.getIsFPConstrained()) {
16644 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16645 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16646 A->getType());
16647 A = Builder.CreateConstrainedFPCall(F, {A});
16648 } else {
16649 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16650 A = Builder.CreateCall(F, {A});
16651 }
16652 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16653 }
16654 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16655 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16656 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16657 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16658 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16659 // otherwise keep the intrinsic.
16660 if (CC != 4) {
16661 Intrinsic::ID IID;
16662
16663 switch (BuiltinID) {
16664 default:
16665 llvm_unreachable("Unsupported intrinsic!");
16666 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16667 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16668 break;
16669 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16670 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16671 break;
16672 case X86::BI__builtin_ia32_sqrtss_round_mask:
16673 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16674 break;
16675 }
16676 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16677 }
16678 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16679 Function *F;
16680 if (Builder.getIsFPConstrained()) {
16681 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16682 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16683 A->getType());
16684 A = Builder.CreateConstrainedFPCall(F, A);
16685 } else {
16686 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16687 A = Builder.CreateCall(F, A);
16688 }
16689 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16690 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16691 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16692 }
16693 case X86::BI__builtin_ia32_sqrtpd256:
16694 case X86::BI__builtin_ia32_sqrtpd:
16695 case X86::BI__builtin_ia32_sqrtps256:
16696 case X86::BI__builtin_ia32_sqrtps:
16697 case X86::BI__builtin_ia32_sqrtph256:
16698 case X86::BI__builtin_ia32_sqrtph:
16699 case X86::BI__builtin_ia32_sqrtph512:
16700 case X86::BI__builtin_ia32_vsqrtnepbf16256:
16701 case X86::BI__builtin_ia32_vsqrtnepbf16:
16702 case X86::BI__builtin_ia32_vsqrtnepbf16512:
16703 case X86::BI__builtin_ia32_sqrtps512:
16704 case X86::BI__builtin_ia32_sqrtpd512: {
16705 if (Ops.size() == 2) {
16706 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16707 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16708 // otherwise keep the intrinsic.
16709 if (CC != 4) {
16710 Intrinsic::ID IID;
16711
16712 switch (BuiltinID) {
16713 default:
16714 llvm_unreachable("Unsupported intrinsic!");
16715 case X86::BI__builtin_ia32_sqrtph512:
16716 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16717 break;
16718 case X86::BI__builtin_ia32_sqrtps512:
16719 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16720 break;
16721 case X86::BI__builtin_ia32_sqrtpd512:
16722 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16723 break;
16724 }
16725 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16726 }
16727 }
16728 if (Builder.getIsFPConstrained()) {
16729 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16730 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16731 Ops[0]->getType());
16732 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16733 } else {
16734 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16735 return Builder.CreateCall(F, Ops[0]);
16736 }
16737 }
16738
16739 case X86::BI__builtin_ia32_pmuludq128:
16740 case X86::BI__builtin_ia32_pmuludq256:
16741 case X86::BI__builtin_ia32_pmuludq512:
16742 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16743
16744 case X86::BI__builtin_ia32_pmuldq128:
16745 case X86::BI__builtin_ia32_pmuldq256:
16746 case X86::BI__builtin_ia32_pmuldq512:
16747 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16748
16749 case X86::BI__builtin_ia32_pternlogd512_mask:
16750 case X86::BI__builtin_ia32_pternlogq512_mask:
16751 case X86::BI__builtin_ia32_pternlogd128_mask:
16752 case X86::BI__builtin_ia32_pternlogd256_mask:
16753 case X86::BI__builtin_ia32_pternlogq128_mask:
16754 case X86::BI__builtin_ia32_pternlogq256_mask:
16755 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16756
16757 case X86::BI__builtin_ia32_pternlogd512_maskz:
16758 case X86::BI__builtin_ia32_pternlogq512_maskz:
16759 case X86::BI__builtin_ia32_pternlogd128_maskz:
16760 case X86::BI__builtin_ia32_pternlogd256_maskz:
16761 case X86::BI__builtin_ia32_pternlogq128_maskz:
16762 case X86::BI__builtin_ia32_pternlogq256_maskz:
16763 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16764
16765 case X86::BI__builtin_ia32_vpshldd128:
16766 case X86::BI__builtin_ia32_vpshldd256:
16767 case X86::BI__builtin_ia32_vpshldd512:
16768 case X86::BI__builtin_ia32_vpshldq128:
16769 case X86::BI__builtin_ia32_vpshldq256:
16770 case X86::BI__builtin_ia32_vpshldq512:
16771 case X86::BI__builtin_ia32_vpshldw128:
16772 case X86::BI__builtin_ia32_vpshldw256:
16773 case X86::BI__builtin_ia32_vpshldw512:
16774 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16775
16776 case X86::BI__builtin_ia32_vpshrdd128:
16777 case X86::BI__builtin_ia32_vpshrdd256:
16778 case X86::BI__builtin_ia32_vpshrdd512:
16779 case X86::BI__builtin_ia32_vpshrdq128:
16780 case X86::BI__builtin_ia32_vpshrdq256:
16781 case X86::BI__builtin_ia32_vpshrdq512:
16782 case X86::BI__builtin_ia32_vpshrdw128:
16783 case X86::BI__builtin_ia32_vpshrdw256:
16784 case X86::BI__builtin_ia32_vpshrdw512:
16785 // Ops 0 and 1 are swapped.
16786 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16787
16788 case X86::BI__builtin_ia32_vpshldvd128:
16789 case X86::BI__builtin_ia32_vpshldvd256:
16790 case X86::BI__builtin_ia32_vpshldvd512:
16791 case X86::BI__builtin_ia32_vpshldvq128:
16792 case X86::BI__builtin_ia32_vpshldvq256:
16793 case X86::BI__builtin_ia32_vpshldvq512:
16794 case X86::BI__builtin_ia32_vpshldvw128:
16795 case X86::BI__builtin_ia32_vpshldvw256:
16796 case X86::BI__builtin_ia32_vpshldvw512:
16797 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16798
16799 case X86::BI__builtin_ia32_vpshrdvd128:
16800 case X86::BI__builtin_ia32_vpshrdvd256:
16801 case X86::BI__builtin_ia32_vpshrdvd512:
16802 case X86::BI__builtin_ia32_vpshrdvq128:
16803 case X86::BI__builtin_ia32_vpshrdvq256:
16804 case X86::BI__builtin_ia32_vpshrdvq512:
16805 case X86::BI__builtin_ia32_vpshrdvw128:
16806 case X86::BI__builtin_ia32_vpshrdvw256:
16807 case X86::BI__builtin_ia32_vpshrdvw512:
16808 // Ops 0 and 1 are swapped.
16809 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16810
16811 // Reductions
16812 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16813 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16814 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16815 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16816 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16817 Function *F =
16818 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16819 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16820 Builder.getFastMathFlags().setAllowReassoc();
16821 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16822 }
16823 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16824 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16825 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16826 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16827 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16828 Function *F =
16829 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16830 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16831 Builder.getFastMathFlags().setAllowReassoc();
16832 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16833 }
16834 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16835 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16836 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16837 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16838 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16839 Function *F =
16840 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16841 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16842 Builder.getFastMathFlags().setNoNaNs();
16843 return Builder.CreateCall(F, {Ops[0]});
16844 }
16845 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16846 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16847 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16848 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16849 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16850 Function *F =
16851 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16852 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16853 Builder.getFastMathFlags().setNoNaNs();
16854 return Builder.CreateCall(F, {Ops[0]});
16855 }
16856
16857 case X86::BI__builtin_ia32_rdrand16_step:
16858 case X86::BI__builtin_ia32_rdrand32_step:
16859 case X86::BI__builtin_ia32_rdrand64_step:
16860 case X86::BI__builtin_ia32_rdseed16_step:
16861 case X86::BI__builtin_ia32_rdseed32_step:
16862 case X86::BI__builtin_ia32_rdseed64_step: {
16863 Intrinsic::ID ID;
16864 switch (BuiltinID) {
16865 default: llvm_unreachable("Unsupported intrinsic!");
16866 case X86::BI__builtin_ia32_rdrand16_step:
16867 ID = Intrinsic::x86_rdrand_16;
16868 break;
16869 case X86::BI__builtin_ia32_rdrand32_step:
16870 ID = Intrinsic::x86_rdrand_32;
16871 break;
16872 case X86::BI__builtin_ia32_rdrand64_step:
16873 ID = Intrinsic::x86_rdrand_64;
16874 break;
16875 case X86::BI__builtin_ia32_rdseed16_step:
16876 ID = Intrinsic::x86_rdseed_16;
16877 break;
16878 case X86::BI__builtin_ia32_rdseed32_step:
16879 ID = Intrinsic::x86_rdseed_32;
16880 break;
16881 case X86::BI__builtin_ia32_rdseed64_step:
16882 ID = Intrinsic::x86_rdseed_64;
16883 break;
16884 }
16885
16886 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16887 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16888 Ops[0]);
16889 return Builder.CreateExtractValue(Call, 1);
16890 }
16891 case X86::BI__builtin_ia32_addcarryx_u32:
16892 case X86::BI__builtin_ia32_addcarryx_u64:
16893 case X86::BI__builtin_ia32_subborrow_u32:
16894 case X86::BI__builtin_ia32_subborrow_u64: {
16895 Intrinsic::ID IID;
16896 switch (BuiltinID) {
16897 default: llvm_unreachable("Unsupported intrinsic!");
16898 case X86::BI__builtin_ia32_addcarryx_u32:
16899 IID = Intrinsic::x86_addcarry_32;
16900 break;
16901 case X86::BI__builtin_ia32_addcarryx_u64:
16902 IID = Intrinsic::x86_addcarry_64;
16903 break;
16904 case X86::BI__builtin_ia32_subborrow_u32:
16905 IID = Intrinsic::x86_subborrow_32;
16906 break;
16907 case X86::BI__builtin_ia32_subborrow_u64:
16908 IID = Intrinsic::x86_subborrow_64;
16909 break;
16910 }
16911
16912 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16913 { Ops[0], Ops[1], Ops[2] });
16914 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16915 Ops[3]);
16916 return Builder.CreateExtractValue(Call, 0);
16917 }
16918
16919 case X86::BI__builtin_ia32_fpclassps128_mask:
16920 case X86::BI__builtin_ia32_fpclassps256_mask:
16921 case X86::BI__builtin_ia32_fpclassps512_mask:
16922 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16923 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16924 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16925 case X86::BI__builtin_ia32_fpclassph128_mask:
16926 case X86::BI__builtin_ia32_fpclassph256_mask:
16927 case X86::BI__builtin_ia32_fpclassph512_mask:
16928 case X86::BI__builtin_ia32_fpclasspd128_mask:
16929 case X86::BI__builtin_ia32_fpclasspd256_mask:
16930 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16931 unsigned NumElts =
16932 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16933 Value *MaskIn = Ops[2];
16934 Ops.erase(&Ops[2]);
16935
16936 Intrinsic::ID ID;
16937 switch (BuiltinID) {
16938 default: llvm_unreachable("Unsupported intrinsic!");
16939 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16940 ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16941 break;
16942 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16943 ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16944 break;
16945 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16946 ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16947 break;
16948 case X86::BI__builtin_ia32_fpclassph128_mask:
16949 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16950 break;
16951 case X86::BI__builtin_ia32_fpclassph256_mask:
16952 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16953 break;
16954 case X86::BI__builtin_ia32_fpclassph512_mask:
16955 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16956 break;
16957 case X86::BI__builtin_ia32_fpclassps128_mask:
16958 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16959 break;
16960 case X86::BI__builtin_ia32_fpclassps256_mask:
16961 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16962 break;
16963 case X86::BI__builtin_ia32_fpclassps512_mask:
16964 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16965 break;
16966 case X86::BI__builtin_ia32_fpclasspd128_mask:
16967 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16968 break;
16969 case X86::BI__builtin_ia32_fpclasspd256_mask:
16970 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16971 break;
16972 case X86::BI__builtin_ia32_fpclasspd512_mask:
16973 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16974 break;
16975 }
16976
16977 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16978 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16979 }
16980
16981 case X86::BI__builtin_ia32_vp2intersect_q_512:
16982 case X86::BI__builtin_ia32_vp2intersect_q_256:
16983 case X86::BI__builtin_ia32_vp2intersect_q_128:
16984 case X86::BI__builtin_ia32_vp2intersect_d_512:
16985 case X86::BI__builtin_ia32_vp2intersect_d_256:
16986 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16987 unsigned NumElts =
16988 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16989 Intrinsic::ID ID;
16990
16991 switch (BuiltinID) {
16992 default: llvm_unreachable("Unsupported intrinsic!");
16993 case X86::BI__builtin_ia32_vp2intersect_q_512:
16994 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16995 break;
16996 case X86::BI__builtin_ia32_vp2intersect_q_256:
16997 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16998 break;
16999 case X86::BI__builtin_ia32_vp2intersect_q_128:
17000 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
17001 break;
17002 case X86::BI__builtin_ia32_vp2intersect_d_512:
17003 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
17004 break;
17005 case X86::BI__builtin_ia32_vp2intersect_d_256:
17006 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
17007 break;
17008 case X86::BI__builtin_ia32_vp2intersect_d_128:
17009 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
17010 break;
17011 }
17012
17013 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
17014 Value *Result = Builder.CreateExtractValue(Call, 0);
17015 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
17017
17018 Result = Builder.CreateExtractValue(Call, 1);
17019 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
17021 }
17022
17023 case X86::BI__builtin_ia32_vpmultishiftqb128:
17024 case X86::BI__builtin_ia32_vpmultishiftqb256:
17025 case X86::BI__builtin_ia32_vpmultishiftqb512: {
17026 Intrinsic::ID ID;
17027 switch (BuiltinID) {
17028 default: llvm_unreachable("Unsupported intrinsic!");
17029 case X86::BI__builtin_ia32_vpmultishiftqb128:
17030 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
17031 break;
17032 case X86::BI__builtin_ia32_vpmultishiftqb256:
17033 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
17034 break;
17035 case X86::BI__builtin_ia32_vpmultishiftqb512:
17036 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
17037 break;
17038 }
17039
17040 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17041 }
17042
17043 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
17044 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
17045 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
17046 unsigned NumElts =
17047 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17048 Value *MaskIn = Ops[2];
17049 Ops.erase(&Ops[2]);
17050
17051 Intrinsic::ID ID;
17052 switch (BuiltinID) {
17053 default: llvm_unreachable("Unsupported intrinsic!");
17054 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
17055 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
17056 break;
17057 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
17058 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
17059 break;
17060 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
17061 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
17062 break;
17063 }
17064
17065 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17066 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
17067 }
17068
17069 // packed comparison intrinsics
17070 case X86::BI__builtin_ia32_cmpeqps:
17071 case X86::BI__builtin_ia32_cmpeqpd:
17072 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
17073 case X86::BI__builtin_ia32_cmpltps:
17074 case X86::BI__builtin_ia32_cmpltpd:
17075 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
17076 case X86::BI__builtin_ia32_cmpleps:
17077 case X86::BI__builtin_ia32_cmplepd:
17078 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
17079 case X86::BI__builtin_ia32_cmpunordps:
17080 case X86::BI__builtin_ia32_cmpunordpd:
17081 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
17082 case X86::BI__builtin_ia32_cmpneqps:
17083 case X86::BI__builtin_ia32_cmpneqpd:
17084 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
17085 case X86::BI__builtin_ia32_cmpnltps:
17086 case X86::BI__builtin_ia32_cmpnltpd:
17087 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
17088 case X86::BI__builtin_ia32_cmpnleps:
17089 case X86::BI__builtin_ia32_cmpnlepd:
17090 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
17091 case X86::BI__builtin_ia32_cmpordps:
17092 case X86::BI__builtin_ia32_cmpordpd:
17093 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
17094 case X86::BI__builtin_ia32_cmpph128_mask:
17095 case X86::BI__builtin_ia32_cmpph256_mask:
17096 case X86::BI__builtin_ia32_cmpph512_mask:
17097 case X86::BI__builtin_ia32_cmpps128_mask:
17098 case X86::BI__builtin_ia32_cmpps256_mask:
17099 case X86::BI__builtin_ia32_cmpps512_mask:
17100 case X86::BI__builtin_ia32_cmppd128_mask:
17101 case X86::BI__builtin_ia32_cmppd256_mask:
17102 case X86::BI__builtin_ia32_cmppd512_mask:
17103 case X86::BI__builtin_ia32_vcmppd256_round_mask:
17104 case X86::BI__builtin_ia32_vcmpps256_round_mask:
17105 case X86::BI__builtin_ia32_vcmpph256_round_mask:
17106 case X86::BI__builtin_ia32_vcmppbf16512_mask:
17107 case X86::BI__builtin_ia32_vcmppbf16256_mask:
17108 case X86::BI__builtin_ia32_vcmppbf16128_mask:
17109 IsMaskFCmp = true;
17110 [[fallthrough]];
17111 case X86::BI__builtin_ia32_cmpps:
17112 case X86::BI__builtin_ia32_cmpps256:
17113 case X86::BI__builtin_ia32_cmppd:
17114 case X86::BI__builtin_ia32_cmppd256: {
17115 // Lowering vector comparisons to fcmp instructions, while
17116 // ignoring signalling behaviour requested
17117 // ignoring rounding mode requested
17118 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
17119
17120 // The third argument is the comparison condition, and integer in the
17121 // range [0, 31]
17122 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
17123
17124 // Lowering to IR fcmp instruction.
17125 // Ignoring requested signaling behaviour,
17126 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
17127 FCmpInst::Predicate Pred;
17128 bool IsSignaling;
17129 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
17130 // behavior is inverted. We'll handle that after the switch.
17131 switch (CC & 0xf) {
17132 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
17133 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
17134 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
17135 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
17136 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
17137 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
17138 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
17139 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
17140 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
17141 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
17142 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
17143 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
17144 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
17145 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
17146 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
17147 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
17148 default: llvm_unreachable("Unhandled CC");
17149 }
17150
17151 // Invert the signalling behavior for 16-31.
17152 if (CC & 0x10)
17153 IsSignaling = !IsSignaling;
17154
17155 // If the predicate is true or false and we're using constrained intrinsics,
17156 // we don't have a compare intrinsic we can use. Just use the legacy X86
17157 // specific intrinsic.
17158 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17159 // use the legacy X86 specific intrinsic.
17160 if (Builder.getIsFPConstrained() &&
17161 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17162 IsMaskFCmp)) {
17163
17164 Intrinsic::ID IID;
17165 switch (BuiltinID) {
17166 default: llvm_unreachable("Unexpected builtin");
17167 case X86::BI__builtin_ia32_cmpps:
17168 IID = Intrinsic::x86_sse_cmp_ps;
17169 break;
17170 case X86::BI__builtin_ia32_cmpps256:
17171 IID = Intrinsic::x86_avx_cmp_ps_256;
17172 break;
17173 case X86::BI__builtin_ia32_cmppd:
17174 IID = Intrinsic::x86_sse2_cmp_pd;
17175 break;
17176 case X86::BI__builtin_ia32_cmppd256:
17177 IID = Intrinsic::x86_avx_cmp_pd_256;
17178 break;
17179 case X86::BI__builtin_ia32_cmpph128_mask:
17180 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17181 break;
17182 case X86::BI__builtin_ia32_cmpph256_mask:
17183 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17184 break;
17185 case X86::BI__builtin_ia32_cmpph512_mask:
17186 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17187 break;
17188 case X86::BI__builtin_ia32_cmpps512_mask:
17189 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17190 break;
17191 case X86::BI__builtin_ia32_cmppd512_mask:
17192 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17193 break;
17194 case X86::BI__builtin_ia32_cmpps128_mask:
17195 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17196 break;
17197 case X86::BI__builtin_ia32_cmpps256_mask:
17198 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17199 break;
17200 case X86::BI__builtin_ia32_cmppd128_mask:
17201 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17202 break;
17203 case X86::BI__builtin_ia32_cmppd256_mask:
17204 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17205 break;
17206 }
17207
17208 Function *Intr = CGM.getIntrinsic(IID);
17209 if (IsMaskFCmp) {
17210 unsigned NumElts =
17211 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17212 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
17213 Value *Cmp = Builder.CreateCall(Intr, Ops);
17214 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
17215 }
17216
17217 return Builder.CreateCall(Intr, Ops);
17218 }
17219
17220 // Builtins without the _mask suffix return a vector of integers
17221 // of the same width as the input vectors
17222 if (IsMaskFCmp) {
17223 // We ignore SAE if strict FP is disabled. We only keep precise
17224 // exception behavior under strict FP.
17225 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17226 // object will be required.
17227 unsigned NumElts =
17228 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17229 Value *Cmp;
17230 if (IsSignaling)
17231 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17232 else
17233 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17234 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
17235 }
17236
17237 return getVectorFCmpIR(Pred, IsSignaling);
17238 }
17239
17240 // SSE scalar comparison intrinsics
17241 case X86::BI__builtin_ia32_cmpeqss:
17242 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17243 case X86::BI__builtin_ia32_cmpltss:
17244 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17245 case X86::BI__builtin_ia32_cmpless:
17246 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17247 case X86::BI__builtin_ia32_cmpunordss:
17248 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17249 case X86::BI__builtin_ia32_cmpneqss:
17250 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17251 case X86::BI__builtin_ia32_cmpnltss:
17252 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17253 case X86::BI__builtin_ia32_cmpnless:
17254 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17255 case X86::BI__builtin_ia32_cmpordss:
17256 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17257 case X86::BI__builtin_ia32_cmpeqsd:
17258 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17259 case X86::BI__builtin_ia32_cmpltsd:
17260 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17261 case X86::BI__builtin_ia32_cmplesd:
17262 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17263 case X86::BI__builtin_ia32_cmpunordsd:
17264 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17265 case X86::BI__builtin_ia32_cmpneqsd:
17266 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17267 case X86::BI__builtin_ia32_cmpnltsd:
17268 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17269 case X86::BI__builtin_ia32_cmpnlesd:
17270 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17271 case X86::BI__builtin_ia32_cmpordsd:
17272 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17273
17274 // f16c half2float intrinsics
17275 case X86::BI__builtin_ia32_vcvtph2ps:
17276 case X86::BI__builtin_ia32_vcvtph2ps256:
17277 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17278 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17279 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17280 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
17281 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
17282 }
17283
17284 // AVX512 bf16 intrinsics
17285 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17286 Ops[2] = getMaskVecValue(
17287 *this, Ops[2],
17288 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17289 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17290 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17291 }
17292 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17293 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17294
17295 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17296 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17297 Intrinsic::ID IID;
17298 switch (BuiltinID) {
17299 default: llvm_unreachable("Unsupported intrinsic!");
17300 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17301 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17302 break;
17303 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17304 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17305 break;
17306 }
17307 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
17308 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
17309 }
17310
17311 case X86::BI__cpuid:
17312 case X86::BI__cpuidex: {
17313 Value *FuncId = EmitScalarExpr(E->getArg(1));
17314 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17315 ? EmitScalarExpr(E->getArg(2))
17316 : llvm::ConstantInt::get(Int32Ty, 0);
17317
17318 llvm::StructType *CpuidRetTy =
17319 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
17320 llvm::FunctionType *FTy =
17321 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
17322
17323 StringRef Asm, Constraints;
17324 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
17325 Asm = "cpuid";
17326 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
17327 } else {
17328 // x86-64 uses %rbx as the base register, so preserve it.
17329 Asm = "xchgq %rbx, ${1:q}\n"
17330 "cpuid\n"
17331 "xchgq %rbx, ${1:q}";
17332 Constraints = "={ax},=r,={cx},={dx},0,2";
17333 }
17334
17335 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
17336 /*hasSideEffects=*/false);
17337 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
17338 Value *BasePtr = EmitScalarExpr(E->getArg(0));
17339 Value *Store = nullptr;
17340 for (unsigned i = 0; i < 4; i++) {
17341 Value *Extracted = Builder.CreateExtractValue(IACall, i);
17342 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
17343 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
17344 }
17345
17346 // Return the last store instruction to signal that we have emitted the
17347 // the intrinsic.
17348 return Store;
17349 }
17350
17351 case X86::BI__emul:
17352 case X86::BI__emulu: {
17353 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
17354 bool isSigned = (BuiltinID == X86::BI__emul);
17355 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
17356 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
17357 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
17358 }
17359 case X86::BI__mulh:
17360 case X86::BI__umulh:
17361 case X86::BI_mul128:
17362 case X86::BI_umul128: {
17363 llvm::Type *ResType = ConvertType(E->getType());
17364 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17365
17366 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17367 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17368 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17369
17370 Value *MulResult, *HigherBits;
17371 if (IsSigned) {
17372 MulResult = Builder.CreateNSWMul(LHS, RHS);
17373 HigherBits = Builder.CreateAShr(MulResult, 64);
17374 } else {
17375 MulResult = Builder.CreateNUWMul(LHS, RHS);
17376 HigherBits = Builder.CreateLShr(MulResult, 64);
17377 }
17378 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17379
17380 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17381 return HigherBits;
17382
17383 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
17384 Builder.CreateStore(HigherBits, HighBitsAddress);
17385 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17386 }
17387
17388 case X86::BI__faststorefence: {
17389 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17390 llvm::SyncScope::System);
17391 }
17392 case X86::BI__shiftleft128:
17393 case X86::BI__shiftright128: {
17394 llvm::Function *F = CGM.getIntrinsic(
17395 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17396 Int64Ty);
17397 // Flip low/high ops and zero-extend amount to matching type.
17398 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17399 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17400 std::swap(Ops[0], Ops[1]);
17401 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
17402 return Builder.CreateCall(F, Ops);
17403 }
17404 case X86::BI_ReadWriteBarrier:
17405 case X86::BI_ReadBarrier:
17406 case X86::BI_WriteBarrier: {
17407 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17408 llvm::SyncScope::SingleThread);
17409 }
17410
17411 case X86::BI_AddressOfReturnAddress: {
17412 Function *F =
17413 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
17414 return Builder.CreateCall(F);
17415 }
17416 case X86::BI__stosb: {
17417 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17418 // instruction, but it will create a memset that won't be optimized away.
17419 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
17420 }
17421 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17422 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17423 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17424 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17425 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17426 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17427 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17428 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17429 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17430 Intrinsic::ID IID;
17431 switch (BuiltinID) {
17432 default:
17433 llvm_unreachable("Unsupported intrinsic!");
17434 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17435 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17436 break;
17437 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17438 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17439 break;
17440 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17441 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17442 break;
17443 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17444 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17445 break;
17446 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17447 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17448 break;
17449 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17450 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17451 break;
17452 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17453 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17454 break;
17455 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17456 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17457 break;
17458 }
17459
17460 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17461 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17462 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17463
17464 auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17465 assert(PtrTy && "arg3 must be of pointer type");
17466 QualType PtreeTy = PtrTy->getPointeeType();
17467 llvm::Type *TyPtee = ConvertType(PtreeTy);
17468
17469 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17470 // Then store tile0 into DstPtr0
17471 Value *T0 = Builder.CreateExtractValue(Call, 0);
17472 Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17473 {TyPtee}, {T0});
17474 Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17475
17476 // Then store tile1 into DstPtr1
17477 Value *T1 = Builder.CreateExtractValue(Call, 1);
17478 Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17479 {TyPtee}, {T1});
17480 Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17481
17482 // Note: Here we escape directly use x86_tilestored64_internal to store
17483 // the results due to it can't make sure the Mem written scope. This may
17484 // cause shapes reloads after first amx intrinsic, which current amx reg-
17485 // ister allocation has no ability to handle it.
17486
17487 return Store;
17488 }
17489 case X86::BI__ud2:
17490 // llvm.trap makes a ud2a instruction on x86.
17491 return EmitTrapCall(Intrinsic::trap);
17492 case X86::BI__int2c: {
17493 // This syscall signals a driver assertion failure in x86 NT kernels.
17494 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
17495 llvm::InlineAsm *IA =
17496 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
17497 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17498 getLLVMContext(), llvm::AttributeList::FunctionIndex,
17499 llvm::Attribute::NoReturn);
17500 llvm::CallInst *CI = Builder.CreateCall(IA);
17501 CI->setAttributes(NoReturnAttr);
17502 return CI;
17503 }
17504 case X86::BI__readfsbyte:
17505 case X86::BI__readfsword:
17506 case X86::BI__readfsdword:
17507 case X86::BI__readfsqword: {
17508 llvm::Type *IntTy = ConvertType(E->getType());
17509 Value *Ptr = Builder.CreateIntToPtr(
17510 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
17511 LoadInst *Load = Builder.CreateAlignedLoad(
17512 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17513 Load->setVolatile(true);
17514 return Load;
17515 }
17516 case X86::BI__readgsbyte:
17517 case X86::BI__readgsword:
17518 case X86::BI__readgsdword:
17519 case X86::BI__readgsqword: {
17520 llvm::Type *IntTy = ConvertType(E->getType());
17521 Value *Ptr = Builder.CreateIntToPtr(
17522 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
17523 LoadInst *Load = Builder.CreateAlignedLoad(
17524 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17525 Load->setVolatile(true);
17526 return Load;
17527 }
17528 case X86::BI__builtin_ia32_encodekey128_u32: {
17529 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17530
17531 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
17532
17533 for (int i = 0; i < 3; ++i) {
17534 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17535 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
17536 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17537 }
17538
17539 return Builder.CreateExtractValue(Call, 0);
17540 }
17541 case X86::BI__builtin_ia32_encodekey256_u32: {
17542 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17543
17544 Value *Call =
17545 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
17546
17547 for (int i = 0; i < 4; ++i) {
17548 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17549 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
17550 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17551 }
17552
17553 return Builder.CreateExtractValue(Call, 0);
17554 }
17555 case X86::BI__builtin_ia32_aesenc128kl_u8:
17556 case X86::BI__builtin_ia32_aesdec128kl_u8:
17557 case X86::BI__builtin_ia32_aesenc256kl_u8:
17558 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17559 Intrinsic::ID IID;
17560 StringRef BlockName;
17561 switch (BuiltinID) {
17562 default:
17563 llvm_unreachable("Unexpected builtin");
17564 case X86::BI__builtin_ia32_aesenc128kl_u8:
17565 IID = Intrinsic::x86_aesenc128kl;
17566 BlockName = "aesenc128kl";
17567 break;
17568 case X86::BI__builtin_ia32_aesdec128kl_u8:
17569 IID = Intrinsic::x86_aesdec128kl;
17570 BlockName = "aesdec128kl";
17571 break;
17572 case X86::BI__builtin_ia32_aesenc256kl_u8:
17573 IID = Intrinsic::x86_aesenc256kl;
17574 BlockName = "aesenc256kl";
17575 break;
17576 case X86::BI__builtin_ia32_aesdec256kl_u8:
17577 IID = Intrinsic::x86_aesdec256kl;
17578 BlockName = "aesdec256kl";
17579 break;
17580 }
17581
17582 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
17583
17584 BasicBlock *NoError =
17585 createBasicBlock(BlockName + "_no_error", this->CurFn);
17586 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17587 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17588
17589 Value *Ret = Builder.CreateExtractValue(Call, 0);
17590 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17591 Value *Out = Builder.CreateExtractValue(Call, 1);
17592 Builder.CreateCondBr(Succ, NoError, Error);
17593
17594 Builder.SetInsertPoint(NoError);
17596 Builder.CreateBr(End);
17597
17598 Builder.SetInsertPoint(Error);
17599 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17600 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
17601 Builder.CreateBr(End);
17602
17603 Builder.SetInsertPoint(End);
17604 return Builder.CreateExtractValue(Call, 0);
17605 }
17606 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17607 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17608 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17609 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17610 Intrinsic::ID IID;
17611 StringRef BlockName;
17612 switch (BuiltinID) {
17613 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17614 IID = Intrinsic::x86_aesencwide128kl;
17615 BlockName = "aesencwide128kl";
17616 break;
17617 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17618 IID = Intrinsic::x86_aesdecwide128kl;
17619 BlockName = "aesdecwide128kl";
17620 break;
17621 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17622 IID = Intrinsic::x86_aesencwide256kl;
17623 BlockName = "aesencwide256kl";
17624 break;
17625 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17626 IID = Intrinsic::x86_aesdecwide256kl;
17627 BlockName = "aesdecwide256kl";
17628 break;
17629 }
17630
17631 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
17632 Value *InOps[9];
17633 InOps[0] = Ops[2];
17634 for (int i = 0; i != 8; ++i) {
17635 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17636 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
17637 }
17638
17639 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
17640
17641 BasicBlock *NoError =
17642 createBasicBlock(BlockName + "_no_error", this->CurFn);
17643 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17644 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17645
17646 Value *Ret = Builder.CreateExtractValue(Call, 0);
17647 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17648 Builder.CreateCondBr(Succ, NoError, Error);
17649
17650 Builder.SetInsertPoint(NoError);
17651 for (int i = 0; i != 8; ++i) {
17652 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17653 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
17654 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
17655 }
17656 Builder.CreateBr(End);
17657
17658 Builder.SetInsertPoint(Error);
17659 for (int i = 0; i != 8; ++i) {
17660 Value *Out = Builder.CreateExtractValue(Call, i + 1);
17661 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17662 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17663 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
17664 }
17665 Builder.CreateBr(End);
17666
17667 Builder.SetInsertPoint(End);
17668 return Builder.CreateExtractValue(Call, 0);
17669 }
17670 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17671 IsConjFMA = true;
17672 [[fallthrough]];
17673 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17674 Intrinsic::ID IID = IsConjFMA
17675 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17676 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17677 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17678 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17679 }
17680 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17681 IsConjFMA = true;
17682 LLVM_FALLTHROUGH;
17683 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17684 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17685 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17686 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17687 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17688 }
17689 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17690 IsConjFMA = true;
17691 [[fallthrough]];
17692 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17693 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17694 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17695 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17696 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
17697 return EmitX86Select(*this, And, Call, Ops[0]);
17698 }
17699 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17700 IsConjFMA = true;
17701 [[fallthrough]];
17702 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17703 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17704 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17705 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17706 static constexpr int Mask[] = {0, 5, 6, 7};
17707 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
17708 }
17709 case X86::BI__builtin_ia32_prefetchi:
17710 return Builder.CreateCall(
17711 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17712 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17713 llvm::ConstantInt::get(Int32Ty, 0)});
17714 }
17715}
17716
17717Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17718 const CallExpr *E) {
17719 // Do not emit the builtin arguments in the arguments of a function call,
17720 // because the evaluation order of function arguments is not specified in C++.
17721 // This is important when testing to ensure the arguments are emitted in the
17722 // same order every time. Eg:
17723 // Instead of:
17724 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17725 // EmitScalarExpr(E->getArg(1)), "swdiv");
17726 // Use:
17727 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17728 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17729 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17730
17731 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17732
17733#include "llvm/TargetParser/PPCTargetParser.def"
17734 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17735 unsigned Mask, CmpInst::Predicate CompOp,
17736 unsigned OpValue) -> Value * {
17737 if (SupportMethod == BUILTIN_PPC_FALSE)
17738 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17739
17740 if (SupportMethod == BUILTIN_PPC_TRUE)
17741 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17742
17743 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17744
17745 llvm::Value *FieldValue = nullptr;
17746 if (SupportMethod == USE_SYS_CONF) {
17747 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17748 llvm::Constant *SysConf =
17749 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17750
17751 // Grab the appropriate field from _system_configuration.
17752 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17753 ConstantInt::get(Int32Ty, FieldIdx)};
17754
17755 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17756 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17758 } else if (SupportMethod == SYS_CALL) {
17759 llvm::FunctionType *FTy =
17760 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17761 llvm::FunctionCallee Func =
17762 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17763
17764 FieldValue =
17765 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17766 }
17767 assert(FieldValue &&
17768 "SupportMethod value is not defined in PPCTargetParser.def.");
17769
17770 if (Mask)
17771 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17772
17773 llvm::Type *ValueType = FieldValue->getType();
17774 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17775 assert(
17776 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17777 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17778
17779 return Builder.CreateICmp(
17780 CompOp, FieldValue,
17781 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17782 };
17783
17784 switch (BuiltinID) {
17785 default: return nullptr;
17786
17787 case Builtin::BI__builtin_cpu_is: {
17788 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17789 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17790 llvm::Triple Triple = getTarget().getTriple();
17791
17792 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17793 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17794
17795 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17796 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17797#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17798 AIXID) \
17799 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17800#include "llvm/TargetParser/PPCTargetParser.def"
17801 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17802 BUILTIN_PPC_UNSUPPORTED, 0}));
17803
17804 if (Triple.isOSAIX()) {
17805 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17806 "Invalid CPU name. Missed by SemaChecking?");
17807 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17808 ICmpInst::ICMP_EQ, AIXIDValue);
17809 }
17810
17811 assert(Triple.isOSLinux() &&
17812 "__builtin_cpu_is() is only supported for AIX and Linux.");
17813
17814 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17815 "Invalid CPU name. Missed by SemaChecking?");
17816
17817 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17818 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17819
17820 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17821 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17822 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17823 return Builder.CreateICmpEQ(TheCall,
17824 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17825 }
17826 case Builtin::BI__builtin_cpu_supports: {
17827 llvm::Triple Triple = getTarget().getTriple();
17828 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17829 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17830 if (Triple.isOSAIX()) {
17831 unsigned SupportMethod, FieldIdx, Mask, Value;
17832 CmpInst::Predicate CompOp;
17833 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17834 unsigned>
17835 CPUSupportType;
17836 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17837 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17838#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17839 VALUE) \
17840 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17841#include "llvm/TargetParser/PPCTargetParser.def"
17842 .Default({BUILTIN_PPC_FALSE, 0, 0,
17843 CmpInst::Predicate(), 0}));
17844 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17845 Value);
17846 }
17847
17848 assert(Triple.isOSLinux() &&
17849 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17850 unsigned FeatureWord;
17851 unsigned BitMask;
17852 std::tie(FeatureWord, BitMask) =
17853 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17854#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17855 .Case(Name, {FA_WORD, Bitmask})
17856#include "llvm/TargetParser/PPCTargetParser.def"
17857 .Default({0, 0});
17858 if (!BitMask)
17859 return Builder.getFalse();
17860 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17861 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17862 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17863 Value *Mask =
17864 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17865 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17866#undef PPC_FAWORD_HWCAP
17867#undef PPC_FAWORD_HWCAP2
17868#undef PPC_FAWORD_CPUID
17869 }
17870
17871 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17872 // call __builtin_readcyclecounter.
17873 case PPC::BI__builtin_ppc_get_timebase:
17874 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17875
17876 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17877 case PPC::BI__builtin_altivec_lvx:
17878 case PPC::BI__builtin_altivec_lvxl:
17879 case PPC::BI__builtin_altivec_lvebx:
17880 case PPC::BI__builtin_altivec_lvehx:
17881 case PPC::BI__builtin_altivec_lvewx:
17882 case PPC::BI__builtin_altivec_lvsl:
17883 case PPC::BI__builtin_altivec_lvsr:
17884 case PPC::BI__builtin_vsx_lxvd2x:
17885 case PPC::BI__builtin_vsx_lxvw4x:
17886 case PPC::BI__builtin_vsx_lxvd2x_be:
17887 case PPC::BI__builtin_vsx_lxvw4x_be:
17888 case PPC::BI__builtin_vsx_lxvl:
17889 case PPC::BI__builtin_vsx_lxvll:
17890 {
17892 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17893 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17894 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17895 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17896 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17897 Ops.pop_back();
17898 }
17899
17900 switch (BuiltinID) {
17901 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17902 case PPC::BI__builtin_altivec_lvx:
17903 ID = Intrinsic::ppc_altivec_lvx;
17904 break;
17905 case PPC::BI__builtin_altivec_lvxl:
17906 ID = Intrinsic::ppc_altivec_lvxl;
17907 break;
17908 case PPC::BI__builtin_altivec_lvebx:
17909 ID = Intrinsic::ppc_altivec_lvebx;
17910 break;
17911 case PPC::BI__builtin_altivec_lvehx:
17912 ID = Intrinsic::ppc_altivec_lvehx;
17913 break;
17914 case PPC::BI__builtin_altivec_lvewx:
17915 ID = Intrinsic::ppc_altivec_lvewx;
17916 break;
17917 case PPC::BI__builtin_altivec_lvsl:
17918 ID = Intrinsic::ppc_altivec_lvsl;
17919 break;
17920 case PPC::BI__builtin_altivec_lvsr:
17921 ID = Intrinsic::ppc_altivec_lvsr;
17922 break;
17923 case PPC::BI__builtin_vsx_lxvd2x:
17924 ID = Intrinsic::ppc_vsx_lxvd2x;
17925 break;
17926 case PPC::BI__builtin_vsx_lxvw4x:
17927 ID = Intrinsic::ppc_vsx_lxvw4x;
17928 break;
17929 case PPC::BI__builtin_vsx_lxvd2x_be:
17930 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17931 break;
17932 case PPC::BI__builtin_vsx_lxvw4x_be:
17933 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17934 break;
17935 case PPC::BI__builtin_vsx_lxvl:
17936 ID = Intrinsic::ppc_vsx_lxvl;
17937 break;
17938 case PPC::BI__builtin_vsx_lxvll:
17939 ID = Intrinsic::ppc_vsx_lxvll;
17940 break;
17941 }
17942 llvm::Function *F = CGM.getIntrinsic(ID);
17943 return Builder.CreateCall(F, Ops, "");
17944 }
17945
17946 // vec_st, vec_xst_be
17947 case PPC::BI__builtin_altivec_stvx:
17948 case PPC::BI__builtin_altivec_stvxl:
17949 case PPC::BI__builtin_altivec_stvebx:
17950 case PPC::BI__builtin_altivec_stvehx:
17951 case PPC::BI__builtin_altivec_stvewx:
17952 case PPC::BI__builtin_vsx_stxvd2x:
17953 case PPC::BI__builtin_vsx_stxvw4x:
17954 case PPC::BI__builtin_vsx_stxvd2x_be:
17955 case PPC::BI__builtin_vsx_stxvw4x_be:
17956 case PPC::BI__builtin_vsx_stxvl:
17957 case PPC::BI__builtin_vsx_stxvll:
17958 {
17960 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17961 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17962 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17963 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17964 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17965 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17966 Ops.pop_back();
17967 }
17968
17969 switch (BuiltinID) {
17970 default: llvm_unreachable("Unsupported st intrinsic!");
17971 case PPC::BI__builtin_altivec_stvx:
17972 ID = Intrinsic::ppc_altivec_stvx;
17973 break;
17974 case PPC::BI__builtin_altivec_stvxl:
17975 ID = Intrinsic::ppc_altivec_stvxl;
17976 break;
17977 case PPC::BI__builtin_altivec_stvebx:
17978 ID = Intrinsic::ppc_altivec_stvebx;
17979 break;
17980 case PPC::BI__builtin_altivec_stvehx:
17981 ID = Intrinsic::ppc_altivec_stvehx;
17982 break;
17983 case PPC::BI__builtin_altivec_stvewx:
17984 ID = Intrinsic::ppc_altivec_stvewx;
17985 break;
17986 case PPC::BI__builtin_vsx_stxvd2x:
17987 ID = Intrinsic::ppc_vsx_stxvd2x;
17988 break;
17989 case PPC::BI__builtin_vsx_stxvw4x:
17990 ID = Intrinsic::ppc_vsx_stxvw4x;
17991 break;
17992 case PPC::BI__builtin_vsx_stxvd2x_be:
17993 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17994 break;
17995 case PPC::BI__builtin_vsx_stxvw4x_be:
17996 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17997 break;
17998 case PPC::BI__builtin_vsx_stxvl:
17999 ID = Intrinsic::ppc_vsx_stxvl;
18000 break;
18001 case PPC::BI__builtin_vsx_stxvll:
18002 ID = Intrinsic::ppc_vsx_stxvll;
18003 break;
18004 }
18005 llvm::Function *F = CGM.getIntrinsic(ID);
18006 return Builder.CreateCall(F, Ops, "");
18007 }
18008 case PPC::BI__builtin_vsx_ldrmb: {
18009 // Essentially boils down to performing an unaligned VMX load sequence so
18010 // as to avoid crossing a page boundary and then shuffling the elements
18011 // into the right side of the vector register.
18012 Value *Op0 = EmitScalarExpr(E->getArg(0));
18013 Value *Op1 = EmitScalarExpr(E->getArg(1));
18014 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
18015 llvm::Type *ResTy = ConvertType(E->getType());
18016 bool IsLE = getTarget().isLittleEndian();
18017
18018 // If the user wants the entire vector, just load the entire vector.
18019 if (NumBytes == 16) {
18020 Value *LD =
18022 if (!IsLE)
18023 return LD;
18024
18025 // Reverse the bytes on LE.
18026 SmallVector<int, 16> RevMask;
18027 for (int Idx = 0; Idx < 16; Idx++)
18028 RevMask.push_back(15 - Idx);
18029 return Builder.CreateShuffleVector(LD, LD, RevMask);
18030 }
18031
18032 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
18033 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
18034 : Intrinsic::ppc_altivec_lvsl);
18035 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
18036 Value *HiMem = Builder.CreateGEP(
18037 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
18038 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
18039 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
18040 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
18041
18042 Op0 = IsLE ? HiLd : LoLd;
18043 Op1 = IsLE ? LoLd : HiLd;
18044 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
18045 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
18046
18047 if (IsLE) {
18048 SmallVector<int, 16> Consts;
18049 for (int Idx = 0; Idx < 16; Idx++) {
18050 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
18051 : 16 - (NumBytes - Idx);
18052 Consts.push_back(Val);
18053 }
18054 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
18055 Zero, Consts);
18056 }
18058 for (int Idx = 0; Idx < 16; Idx++)
18059 Consts.push_back(Builder.getInt8(NumBytes + Idx));
18060 Value *Mask2 = ConstantVector::get(Consts);
18061 return Builder.CreateBitCast(
18062 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
18063 }
18064 case PPC::BI__builtin_vsx_strmb: {
18065 Value *Op0 = EmitScalarExpr(E->getArg(0));
18066 Value *Op1 = EmitScalarExpr(E->getArg(1));
18067 Value *Op2 = EmitScalarExpr(E->getArg(2));
18068 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
18069 bool IsLE = getTarget().isLittleEndian();
18070 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
18071 // Storing the whole vector, simply store it on BE and reverse bytes and
18072 // store on LE.
18073 if (Width == 16) {
18074 Value *StVec = Op2;
18075 if (IsLE) {
18076 SmallVector<int, 16> RevMask;
18077 for (int Idx = 0; Idx < 16; Idx++)
18078 RevMask.push_back(15 - Idx);
18079 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
18080 }
18081 return Builder.CreateStore(
18082 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
18083 }
18084 auto *ConvTy = Int64Ty;
18085 unsigned NumElts = 0;
18086 switch (Width) {
18087 default:
18088 llvm_unreachable("width for stores must be a power of 2");
18089 case 8:
18090 ConvTy = Int64Ty;
18091 NumElts = 2;
18092 break;
18093 case 4:
18094 ConvTy = Int32Ty;
18095 NumElts = 4;
18096 break;
18097 case 2:
18098 ConvTy = Int16Ty;
18099 NumElts = 8;
18100 break;
18101 case 1:
18102 ConvTy = Int8Ty;
18103 NumElts = 16;
18104 break;
18105 }
18106 Value *Vec = Builder.CreateBitCast(
18107 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
18108 Value *Ptr =
18109 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
18110 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
18111 if (IsLE && Width > 1) {
18112 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
18113 Elt = Builder.CreateCall(F, Elt);
18114 }
18115 return Builder.CreateStore(
18116 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
18117 };
18118 unsigned Stored = 0;
18119 unsigned RemainingBytes = NumBytes;
18120 Value *Result;
18121 if (NumBytes == 16)
18122 return StoreSubVec(16, 0, 0);
18123 if (NumBytes >= 8) {
18124 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
18125 RemainingBytes -= 8;
18126 Stored += 8;
18127 }
18128 if (RemainingBytes >= 4) {
18129 Result = StoreSubVec(4, NumBytes - Stored - 4,
18130 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
18131 RemainingBytes -= 4;
18132 Stored += 4;
18133 }
18134 if (RemainingBytes >= 2) {
18135 Result = StoreSubVec(2, NumBytes - Stored - 2,
18136 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
18137 RemainingBytes -= 2;
18138 Stored += 2;
18139 }
18140 if (RemainingBytes)
18141 Result =
18142 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18143 return Result;
18144 }
18145 // Square root
18146 case PPC::BI__builtin_vsx_xvsqrtsp:
18147 case PPC::BI__builtin_vsx_xvsqrtdp: {
18148 llvm::Type *ResultType = ConvertType(E->getType());
18149 Value *X = EmitScalarExpr(E->getArg(0));
18150 if (Builder.getIsFPConstrained()) {
18151 llvm::Function *F = CGM.getIntrinsic(
18152 Intrinsic::experimental_constrained_sqrt, ResultType);
18153 return Builder.CreateConstrainedFPCall(F, X);
18154 } else {
18155 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18156 return Builder.CreateCall(F, X);
18157 }
18158 }
18159 // Count leading zeros
18160 case PPC::BI__builtin_altivec_vclzb:
18161 case PPC::BI__builtin_altivec_vclzh:
18162 case PPC::BI__builtin_altivec_vclzw:
18163 case PPC::BI__builtin_altivec_vclzd: {
18164 llvm::Type *ResultType = ConvertType(E->getType());
18165 Value *X = EmitScalarExpr(E->getArg(0));
18166 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18167 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18168 return Builder.CreateCall(F, {X, Undef});
18169 }
18170 case PPC::BI__builtin_altivec_vctzb:
18171 case PPC::BI__builtin_altivec_vctzh:
18172 case PPC::BI__builtin_altivec_vctzw:
18173 case PPC::BI__builtin_altivec_vctzd: {
18174 llvm::Type *ResultType = ConvertType(E->getType());
18175 Value *X = EmitScalarExpr(E->getArg(0));
18176 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18177 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18178 return Builder.CreateCall(F, {X, Undef});
18179 }
18180 case PPC::BI__builtin_altivec_vinsd:
18181 case PPC::BI__builtin_altivec_vinsw:
18182 case PPC::BI__builtin_altivec_vinsd_elt:
18183 case PPC::BI__builtin_altivec_vinsw_elt: {
18184 llvm::Type *ResultType = ConvertType(E->getType());
18185 Value *Op0 = EmitScalarExpr(E->getArg(0));
18186 Value *Op1 = EmitScalarExpr(E->getArg(1));
18187 Value *Op2 = EmitScalarExpr(E->getArg(2));
18188
18189 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18190 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18191
18192 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18193 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18194
18195 // The third argument must be a compile time constant.
18196 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18197 assert(ArgCI &&
18198 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18199
18200 // Valid value for the third argument is dependent on the input type and
18201 // builtin called.
18202 int ValidMaxValue = 0;
18203 if (IsUnaligned)
18204 ValidMaxValue = (Is32bit) ? 12 : 8;
18205 else
18206 ValidMaxValue = (Is32bit) ? 3 : 1;
18207
18208 // Get value of third argument.
18209 int64_t ConstArg = ArgCI->getSExtValue();
18210
18211 // Compose range checking error message.
18212 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
18213 RangeErrMsg += " number " + llvm::to_string(ConstArg);
18214 RangeErrMsg += " is outside of the valid range [0, ";
18215 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
18216
18217 // Issue error if third argument is not within the valid range.
18218 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18219 CGM.Error(E->getExprLoc(), RangeErrMsg);
18220
18221 // Input to vec_replace_elt is an element index, convert to byte index.
18222 if (!IsUnaligned) {
18223 ConstArg *= Is32bit ? 4 : 8;
18224 // Fix the constant according to endianess.
18225 if (getTarget().isLittleEndian())
18226 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18227 }
18228
18229 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18230 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
18231 // Casting input to vector int as per intrinsic definition.
18232 Op0 =
18233 Is32bit
18234 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
18235 : Builder.CreateBitCast(Op0,
18236 llvm::FixedVectorType::get(Int64Ty, 2));
18237 return Builder.CreateBitCast(
18238 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
18239 }
18240 case PPC::BI__builtin_altivec_vadduqm:
18241 case PPC::BI__builtin_altivec_vsubuqm: {
18242 Value *Op0 = EmitScalarExpr(E->getArg(0));
18243 Value *Op1 = EmitScalarExpr(E->getArg(1));
18244 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
18245 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18246 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18247 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18248 return Builder.CreateAdd(Op0, Op1, "vadduqm");
18249 else
18250 return Builder.CreateSub(Op0, Op1, "vsubuqm");
18251 }
18252 case PPC::BI__builtin_altivec_vaddcuq_c:
18253 case PPC::BI__builtin_altivec_vsubcuq_c: {
18255 Value *Op0 = EmitScalarExpr(E->getArg(0));
18256 Value *Op1 = EmitScalarExpr(E->getArg(1));
18257 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18258 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18259 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18260 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18261 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18262 ? Intrinsic::ppc_altivec_vaddcuq
18263 : Intrinsic::ppc_altivec_vsubcuq;
18264 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18265 }
18266 case PPC::BI__builtin_altivec_vaddeuqm_c:
18267 case PPC::BI__builtin_altivec_vaddecuq_c:
18268 case PPC::BI__builtin_altivec_vsubeuqm_c:
18269 case PPC::BI__builtin_altivec_vsubecuq_c: {
18271 Value *Op0 = EmitScalarExpr(E->getArg(0));
18272 Value *Op1 = EmitScalarExpr(E->getArg(1));
18273 Value *Op2 = EmitScalarExpr(E->getArg(2));
18274 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18275 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18276 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18277 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18278 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
18279 switch (BuiltinID) {
18280 default:
18281 llvm_unreachable("Unsupported intrinsic!");
18282 case PPC::BI__builtin_altivec_vaddeuqm_c:
18283 ID = Intrinsic::ppc_altivec_vaddeuqm;
18284 break;
18285 case PPC::BI__builtin_altivec_vaddecuq_c:
18286 ID = Intrinsic::ppc_altivec_vaddecuq;
18287 break;
18288 case PPC::BI__builtin_altivec_vsubeuqm_c:
18289 ID = Intrinsic::ppc_altivec_vsubeuqm;
18290 break;
18291 case PPC::BI__builtin_altivec_vsubecuq_c:
18292 ID = Intrinsic::ppc_altivec_vsubecuq;
18293 break;
18294 }
18295 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18296 }
18297 case PPC::BI__builtin_ppc_rldimi:
18298 case PPC::BI__builtin_ppc_rlwimi: {
18299 Value *Op0 = EmitScalarExpr(E->getArg(0));
18300 Value *Op1 = EmitScalarExpr(E->getArg(1));
18301 Value *Op2 = EmitScalarExpr(E->getArg(2));
18302 Value *Op3 = EmitScalarExpr(E->getArg(3));
18303 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18304 // leverage peephole and avoid legalization efforts.
18305 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18306 !getTarget().getTriple().isPPC64()) {
18307 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
18308 Op2 = Builder.CreateZExt(Op2, Int64Ty);
18309 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
18310 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
18311 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
18312 }
18313 return Builder.CreateCall(
18314 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
18315 ? Intrinsic::ppc_rldimi
18316 : Intrinsic::ppc_rlwimi),
18317 {Op0, Op1, Op2, Op3});
18318 }
18319 case PPC::BI__builtin_ppc_rlwnm: {
18320 Value *Op0 = EmitScalarExpr(E->getArg(0));
18321 Value *Op1 = EmitScalarExpr(E->getArg(1));
18322 Value *Op2 = EmitScalarExpr(E->getArg(2));
18323 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
18324 {Op0, Op1, Op2});
18325 }
18326 case PPC::BI__builtin_ppc_poppar4:
18327 case PPC::BI__builtin_ppc_poppar8: {
18328 Value *Op0 = EmitScalarExpr(E->getArg(0));
18329 llvm::Type *ArgType = Op0->getType();
18330 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
18331 Value *Tmp = Builder.CreateCall(F, Op0);
18332
18333 llvm::Type *ResultType = ConvertType(E->getType());
18334 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
18335 if (Result->getType() != ResultType)
18336 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
18337 "cast");
18338 return Result;
18339 }
18340 case PPC::BI__builtin_ppc_cmpb: {
18341 Value *Op0 = EmitScalarExpr(E->getArg(0));
18342 Value *Op1 = EmitScalarExpr(E->getArg(1));
18343 if (getTarget().getTriple().isPPC64()) {
18344 Function *F =
18345 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
18346 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
18347 }
18348 // For 32 bit, emit the code as below:
18349 // %conv = trunc i64 %a to i32
18350 // %conv1 = trunc i64 %b to i32
18351 // %shr = lshr i64 %a, 32
18352 // %conv2 = trunc i64 %shr to i32
18353 // %shr3 = lshr i64 %b, 32
18354 // %conv4 = trunc i64 %shr3 to i32
18355 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18356 // %conv5 = zext i32 %0 to i64
18357 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18358 // %conv614 = zext i32 %1 to i64
18359 // %shl = shl nuw i64 %conv614, 32
18360 // %or = or i64 %shl, %conv5
18361 // ret i64 %or
18362 Function *F =
18363 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
18364 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
18365 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
18366 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
18367 Value *ArgOneHi =
18368 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
18369 Value *ArgTwoHi =
18370 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
18371 Value *ResLo = Builder.CreateZExt(
18372 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
18373 Value *ResHiShift = Builder.CreateZExt(
18374 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
18375 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
18376 return Builder.CreateOr(ResLo, ResHi);
18377 }
18378 // Copy sign
18379 case PPC::BI__builtin_vsx_xvcpsgnsp:
18380 case PPC::BI__builtin_vsx_xvcpsgndp: {
18381 llvm::Type *ResultType = ConvertType(E->getType());
18382 Value *X = EmitScalarExpr(E->getArg(0));
18383 Value *Y = EmitScalarExpr(E->getArg(1));
18384 ID = Intrinsic::copysign;
18385 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18386 return Builder.CreateCall(F, {X, Y});
18387 }
18388 // Rounding/truncation
18389 case PPC::BI__builtin_vsx_xvrspip:
18390 case PPC::BI__builtin_vsx_xvrdpip:
18391 case PPC::BI__builtin_vsx_xvrdpim:
18392 case PPC::BI__builtin_vsx_xvrspim:
18393 case PPC::BI__builtin_vsx_xvrdpi:
18394 case PPC::BI__builtin_vsx_xvrspi:
18395 case PPC::BI__builtin_vsx_xvrdpic:
18396 case PPC::BI__builtin_vsx_xvrspic:
18397 case PPC::BI__builtin_vsx_xvrdpiz:
18398 case PPC::BI__builtin_vsx_xvrspiz: {
18399 llvm::Type *ResultType = ConvertType(E->getType());
18400 Value *X = EmitScalarExpr(E->getArg(0));
18401 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18402 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18403 ID = Builder.getIsFPConstrained()
18404 ? Intrinsic::experimental_constrained_floor
18405 : Intrinsic::floor;
18406 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18407 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18408 ID = Builder.getIsFPConstrained()
18409 ? Intrinsic::experimental_constrained_round
18410 : Intrinsic::round;
18411 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18412 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18413 ID = Builder.getIsFPConstrained()
18414 ? Intrinsic::experimental_constrained_rint
18415 : Intrinsic::rint;
18416 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18417 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18418 ID = Builder.getIsFPConstrained()
18419 ? Intrinsic::experimental_constrained_ceil
18420 : Intrinsic::ceil;
18421 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18422 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18423 ID = Builder.getIsFPConstrained()
18424 ? Intrinsic::experimental_constrained_trunc
18425 : Intrinsic::trunc;
18426 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18427 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
18428 : Builder.CreateCall(F, X);
18429 }
18430
18431 // Absolute value
18432 case PPC::BI__builtin_vsx_xvabsdp:
18433 case PPC::BI__builtin_vsx_xvabssp: {
18434 llvm::Type *ResultType = ConvertType(E->getType());
18435 Value *X = EmitScalarExpr(E->getArg(0));
18436 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18437 return Builder.CreateCall(F, X);
18438 }
18439
18440 // Fastmath by default
18441 case PPC::BI__builtin_ppc_recipdivf:
18442 case PPC::BI__builtin_ppc_recipdivd:
18443 case PPC::BI__builtin_ppc_rsqrtf:
18444 case PPC::BI__builtin_ppc_rsqrtd: {
18445 FastMathFlags FMF = Builder.getFastMathFlags();
18446 Builder.getFastMathFlags().setFast();
18447 llvm::Type *ResultType = ConvertType(E->getType());
18448 Value *X = EmitScalarExpr(E->getArg(0));
18449
18450 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18451 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18452 Value *Y = EmitScalarExpr(E->getArg(1));
18453 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
18454 Builder.getFastMathFlags() &= (FMF);
18455 return FDiv;
18456 }
18457 auto *One = ConstantFP::get(ResultType, 1.0);
18458 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18459 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
18460 Builder.getFastMathFlags() &= (FMF);
18461 return FDiv;
18462 }
18463 case PPC::BI__builtin_ppc_alignx: {
18464 Value *Op0 = EmitScalarExpr(E->getArg(0));
18465 Value *Op1 = EmitScalarExpr(E->getArg(1));
18466 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18467 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18468 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18469 llvm::Value::MaximumAlignment);
18470
18471 emitAlignmentAssumption(Op1, E->getArg(1),
18472 /*The expr loc is sufficient.*/ SourceLocation(),
18473 AlignmentCI, nullptr);
18474 return Op1;
18475 }
18476 case PPC::BI__builtin_ppc_rdlam: {
18477 Value *Op0 = EmitScalarExpr(E->getArg(0));
18478 Value *Op1 = EmitScalarExpr(E->getArg(1));
18479 Value *Op2 = EmitScalarExpr(E->getArg(2));
18480 llvm::Type *Ty = Op0->getType();
18481 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
18482 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
18483 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18484 return Builder.CreateAnd(Rotate, Op2);
18485 }
18486 case PPC::BI__builtin_ppc_load2r: {
18487 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
18488 Value *Op0 = EmitScalarExpr(E->getArg(0));
18489 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
18490 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
18491 }
18492 // FMA variations
18493 case PPC::BI__builtin_ppc_fnmsub:
18494 case PPC::BI__builtin_ppc_fnmsubs:
18495 case PPC::BI__builtin_vsx_xvmaddadp:
18496 case PPC::BI__builtin_vsx_xvmaddasp:
18497 case PPC::BI__builtin_vsx_xvnmaddadp:
18498 case PPC::BI__builtin_vsx_xvnmaddasp:
18499 case PPC::BI__builtin_vsx_xvmsubadp:
18500 case PPC::BI__builtin_vsx_xvmsubasp:
18501 case PPC::BI__builtin_vsx_xvnmsubadp:
18502 case PPC::BI__builtin_vsx_xvnmsubasp: {
18503 llvm::Type *ResultType = ConvertType(E->getType());
18504 Value *X = EmitScalarExpr(E->getArg(0));
18505 Value *Y = EmitScalarExpr(E->getArg(1));
18506 Value *Z = EmitScalarExpr(E->getArg(2));
18507 llvm::Function *F;
18508 if (Builder.getIsFPConstrained())
18509 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18510 else
18511 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18512 switch (BuiltinID) {
18513 case PPC::BI__builtin_vsx_xvmaddadp:
18514 case PPC::BI__builtin_vsx_xvmaddasp:
18515 if (Builder.getIsFPConstrained())
18516 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18517 else
18518 return Builder.CreateCall(F, {X, Y, Z});
18519 case PPC::BI__builtin_vsx_xvnmaddadp:
18520 case PPC::BI__builtin_vsx_xvnmaddasp:
18521 if (Builder.getIsFPConstrained())
18522 return Builder.CreateFNeg(
18523 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
18524 else
18525 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18526 case PPC::BI__builtin_vsx_xvmsubadp:
18527 case PPC::BI__builtin_vsx_xvmsubasp:
18528 if (Builder.getIsFPConstrained())
18529 return Builder.CreateConstrainedFPCall(
18530 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18531 else
18532 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18533 case PPC::BI__builtin_ppc_fnmsub:
18534 case PPC::BI__builtin_ppc_fnmsubs:
18535 case PPC::BI__builtin_vsx_xvnmsubadp:
18536 case PPC::BI__builtin_vsx_xvnmsubasp:
18537 if (Builder.getIsFPConstrained())
18538 return Builder.CreateFNeg(
18539 Builder.CreateConstrainedFPCall(
18540 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
18541 "neg");
18542 else
18543 return Builder.CreateCall(
18544 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
18545 }
18546 llvm_unreachable("Unknown FMA operation");
18547 return nullptr; // Suppress no-return warning
18548 }
18549
18550 case PPC::BI__builtin_vsx_insertword: {
18551 Value *Op0 = EmitScalarExpr(E->getArg(0));
18552 Value *Op1 = EmitScalarExpr(E->getArg(1));
18553 Value *Op2 = EmitScalarExpr(E->getArg(2));
18554 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
18555
18556 // Third argument is a compile time constant int. It must be clamped to
18557 // to the range [0, 12].
18558 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18559 assert(ArgCI &&
18560 "Third arg to xxinsertw intrinsic must be constant integer");
18561 const int64_t MaxIndex = 12;
18562 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18563
18564 // The builtin semantics don't exactly match the xxinsertw instructions
18565 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18566 // word from the first argument, and inserts it in the second argument. The
18567 // instruction extracts the word from its second input register and inserts
18568 // it into its first input register, so swap the first and second arguments.
18569 std::swap(Op0, Op1);
18570
18571 // Need to cast the second argument from a vector of unsigned int to a
18572 // vector of long long.
18573 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18574
18575 if (getTarget().isLittleEndian()) {
18576 // Reverse the double words in the vector we will extract from.
18577 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18578 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
18579
18580 // Reverse the index.
18581 Index = MaxIndex - Index;
18582 }
18583
18584 // Intrinsic expects the first arg to be a vector of int.
18585 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18586 Op2 = ConstantInt::getSigned(Int32Ty, Index);
18587 return Builder.CreateCall(F, {Op0, Op1, Op2});
18588 }
18589
18590 case PPC::BI__builtin_vsx_extractuword: {
18591 Value *Op0 = EmitScalarExpr(E->getArg(0));
18592 Value *Op1 = EmitScalarExpr(E->getArg(1));
18593 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18594
18595 // Intrinsic expects the first argument to be a vector of doublewords.
18596 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18597
18598 // The second argument is a compile time constant int that needs to
18599 // be clamped to the range [0, 12].
18600 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18601 assert(ArgCI &&
18602 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18603 const int64_t MaxIndex = 12;
18604 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18605
18606 if (getTarget().isLittleEndian()) {
18607 // Reverse the index.
18608 Index = MaxIndex - Index;
18609 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18610
18611 // Emit the call, then reverse the double words of the results vector.
18612 Value *Call = Builder.CreateCall(F, {Op0, Op1});
18613
18614 Value *ShuffleCall =
18615 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
18616 return ShuffleCall;
18617 } else {
18618 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18619 return Builder.CreateCall(F, {Op0, Op1});
18620 }
18621 }
18622
18623 case PPC::BI__builtin_vsx_xxpermdi: {
18624 Value *Op0 = EmitScalarExpr(E->getArg(0));
18625 Value *Op1 = EmitScalarExpr(E->getArg(1));
18626 Value *Op2 = EmitScalarExpr(E->getArg(2));
18627 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18628 assert(ArgCI && "Third arg must be constant integer!");
18629
18630 unsigned Index = ArgCI->getZExtValue();
18631 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18632 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18633
18634 // Account for endianness by treating this as just a shuffle. So we use the
18635 // same indices for both LE and BE in order to produce expected results in
18636 // both cases.
18637 int ElemIdx0 = (Index & 2) >> 1;
18638 int ElemIdx1 = 2 + (Index & 1);
18639
18640 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18641 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18642 QualType BIRetType = E->getType();
18643 auto RetTy = ConvertType(BIRetType);
18644 return Builder.CreateBitCast(ShuffleCall, RetTy);
18645 }
18646
18647 case PPC::BI__builtin_vsx_xxsldwi: {
18648 Value *Op0 = EmitScalarExpr(E->getArg(0));
18649 Value *Op1 = EmitScalarExpr(E->getArg(1));
18650 Value *Op2 = EmitScalarExpr(E->getArg(2));
18651 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18652 assert(ArgCI && "Third argument must be a compile time constant");
18653 unsigned Index = ArgCI->getZExtValue() & 0x3;
18654 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18655 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
18656
18657 // Create a shuffle mask
18658 int ElemIdx0;
18659 int ElemIdx1;
18660 int ElemIdx2;
18661 int ElemIdx3;
18662 if (getTarget().isLittleEndian()) {
18663 // Little endian element N comes from element 8+N-Index of the
18664 // concatenated wide vector (of course, using modulo arithmetic on
18665 // the total number of elements).
18666 ElemIdx0 = (8 - Index) % 8;
18667 ElemIdx1 = (9 - Index) % 8;
18668 ElemIdx2 = (10 - Index) % 8;
18669 ElemIdx3 = (11 - Index) % 8;
18670 } else {
18671 // Big endian ElemIdx<N> = Index + N
18672 ElemIdx0 = Index;
18673 ElemIdx1 = Index + 1;
18674 ElemIdx2 = Index + 2;
18675 ElemIdx3 = Index + 3;
18676 }
18677
18678 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18679 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18680 QualType BIRetType = E->getType();
18681 auto RetTy = ConvertType(BIRetType);
18682 return Builder.CreateBitCast(ShuffleCall, RetTy);
18683 }
18684
18685 case PPC::BI__builtin_pack_vector_int128: {
18686 Value *Op0 = EmitScalarExpr(E->getArg(0));
18687 Value *Op1 = EmitScalarExpr(E->getArg(1));
18688 bool isLittleEndian = getTarget().isLittleEndian();
18689 Value *PoisonValue =
18690 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
18691 Value *Res = Builder.CreateInsertElement(
18692 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18693 Res = Builder.CreateInsertElement(Res, Op1,
18694 (uint64_t)(isLittleEndian ? 0 : 1));
18695 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
18696 }
18697
18698 case PPC::BI__builtin_unpack_vector_int128: {
18699 Value *Op0 = EmitScalarExpr(E->getArg(0));
18700 Value *Op1 = EmitScalarExpr(E->getArg(1));
18701 ConstantInt *Index = cast<ConstantInt>(Op1);
18702 Value *Unpacked = Builder.CreateBitCast(
18703 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18704
18705 if (getTarget().isLittleEndian())
18706 Index =
18707 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18708
18709 return Builder.CreateExtractElement(Unpacked, Index);
18710 }
18711
18712 case PPC::BI__builtin_ppc_sthcx: {
18713 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18714 Value *Op0 = EmitScalarExpr(E->getArg(0));
18715 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18716 return Builder.CreateCall(F, {Op0, Op1});
18717 }
18718
18719 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18720 // Some of the MMA instructions accumulate their result into an existing
18721 // accumulator whereas the others generate a new accumulator. So we need to
18722 // use custom code generation to expand a builtin call with a pointer to a
18723 // load (if the corresponding instruction accumulates its result) followed by
18724 // the call to the intrinsic and a store of the result.
18725#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18726 case PPC::BI__builtin_##Name:
18727#include "clang/Basic/BuiltinsPPC.def"
18728 {
18730 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18731 if (E->getArg(i)->getType()->isArrayType())
18732 Ops.push_back(
18733 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18734 else
18735 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18736 // The first argument of these two builtins is a pointer used to store their
18737 // result. However, the llvm intrinsics return their result in multiple
18738 // return values. So, here we emit code extracting these values from the
18739 // intrinsic results and storing them using that pointer.
18740 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18741 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18742 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18743 unsigned NumVecs = 2;
18744 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18745 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18746 NumVecs = 4;
18747 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18748 }
18749 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18750 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18751 Value *Vec = Builder.CreateLoad(Addr);
18752 Value *Call = Builder.CreateCall(F, {Vec});
18753 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18754 Value *Ptr = Ops[0];
18755 for (unsigned i=0; i<NumVecs; i++) {
18756 Value *Vec = Builder.CreateExtractValue(Call, i);
18757 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18758 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18759 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18760 }
18761 return Call;
18762 }
18763 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18764 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18765 // Reverse the order of the operands for LE, so the
18766 // same builtin call can be used on both LE and BE
18767 // without the need for the programmer to swap operands.
18768 // The operands are reversed starting from the second argument,
18769 // the first operand is the pointer to the pair/accumulator
18770 // that is being built.
18771 if (getTarget().isLittleEndian())
18772 std::reverse(Ops.begin() + 1, Ops.end());
18773 }
18774 bool Accumulate;
18775 switch (BuiltinID) {
18776 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18777 case PPC::BI__builtin_##Name: \
18778 ID = Intrinsic::ppc_##Intr; \
18779 Accumulate = Acc; \
18780 break;
18781 #include "clang/Basic/BuiltinsPPC.def"
18782 }
18783 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18784 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18785 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18786 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18787 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18788 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18789 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18790 } else {
18791 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18792 }
18793 Ops.pop_back();
18794 llvm::Function *F = CGM.getIntrinsic(ID);
18795 return Builder.CreateCall(F, Ops, "");
18796 }
18797 SmallVector<Value*, 4> CallOps;
18798 if (Accumulate) {
18799 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18800 Value *Acc = Builder.CreateLoad(Addr);
18801 CallOps.push_back(Acc);
18802 }
18803 for (unsigned i=1; i<Ops.size(); i++)
18804 CallOps.push_back(Ops[i]);
18805 llvm::Function *F = CGM.getIntrinsic(ID);
18806 Value *Call = Builder.CreateCall(F, CallOps);
18807 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
18808 }
18809
18810 case PPC::BI__builtin_ppc_compare_and_swap:
18811 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18812 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18813 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18814 Value *OldVal = Builder.CreateLoad(OldValAddr);
18815 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18816 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18817 Value *Op2 = EmitScalarExpr(E->getArg(2));
18818 auto Pair = EmitAtomicCompareExchange(
18819 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18820 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18821 // Unlike c11's atomic_compare_exchange, according to
18822 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18823 // > In either case, the contents of the memory location specified by addr
18824 // > are copied into the memory location specified by old_val_addr.
18825 // But it hasn't specified storing to OldValAddr is atomic or not and
18826 // which order to use. Now following XL's codegen, treat it as a normal
18827 // store.
18828 Value *LoadedVal = Pair.first.getScalarVal();
18829 Builder.CreateStore(LoadedVal, OldValAddr);
18830 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18831 }
18832 case PPC::BI__builtin_ppc_fetch_and_add:
18833 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18834 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18835 llvm::AtomicOrdering::Monotonic);
18836 }
18837 case PPC::BI__builtin_ppc_fetch_and_and:
18838 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18839 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18840 llvm::AtomicOrdering::Monotonic);
18841 }
18842
18843 case PPC::BI__builtin_ppc_fetch_and_or:
18844 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18845 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18846 llvm::AtomicOrdering::Monotonic);
18847 }
18848 case PPC::BI__builtin_ppc_fetch_and_swap:
18849 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18850 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18851 llvm::AtomicOrdering::Monotonic);
18852 }
18853 case PPC::BI__builtin_ppc_ldarx:
18854 case PPC::BI__builtin_ppc_lwarx:
18855 case PPC::BI__builtin_ppc_lharx:
18856 case PPC::BI__builtin_ppc_lbarx:
18857 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18858 case PPC::BI__builtin_ppc_mfspr: {
18859 Value *Op0 = EmitScalarExpr(E->getArg(0));
18860 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18861 ? Int32Ty
18862 : Int64Ty;
18863 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18864 return Builder.CreateCall(F, {Op0});
18865 }
18866 case PPC::BI__builtin_ppc_mtspr: {
18867 Value *Op0 = EmitScalarExpr(E->getArg(0));
18868 Value *Op1 = EmitScalarExpr(E->getArg(1));
18869 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18870 ? Int32Ty
18871 : Int64Ty;
18872 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18873 return Builder.CreateCall(F, {Op0, Op1});
18874 }
18875 case PPC::BI__builtin_ppc_popcntb: {
18876 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18877 llvm::Type *ArgType = ArgValue->getType();
18878 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18879 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18880 }
18881 case PPC::BI__builtin_ppc_mtfsf: {
18882 // The builtin takes a uint32 that needs to be cast to an
18883 // f64 to be passed to the intrinsic.
18884 Value *Op0 = EmitScalarExpr(E->getArg(0));
18885 Value *Op1 = EmitScalarExpr(E->getArg(1));
18886 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18887 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18888 return Builder.CreateCall(F, {Op0, Cast}, "");
18889 }
18890
18891 case PPC::BI__builtin_ppc_swdiv_nochk:
18892 case PPC::BI__builtin_ppc_swdivs_nochk: {
18893 Value *Op0 = EmitScalarExpr(E->getArg(0));
18894 Value *Op1 = EmitScalarExpr(E->getArg(1));
18895 FastMathFlags FMF = Builder.getFastMathFlags();
18896 Builder.getFastMathFlags().setFast();
18897 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18898 Builder.getFastMathFlags() &= (FMF);
18899 return FDiv;
18900 }
18901 case PPC::BI__builtin_ppc_fric:
18903 *this, E, Intrinsic::rint,
18904 Intrinsic::experimental_constrained_rint))
18905 .getScalarVal();
18906 case PPC::BI__builtin_ppc_frim:
18907 case PPC::BI__builtin_ppc_frims:
18909 *this, E, Intrinsic::floor,
18910 Intrinsic::experimental_constrained_floor))
18911 .getScalarVal();
18912 case PPC::BI__builtin_ppc_frin:
18913 case PPC::BI__builtin_ppc_frins:
18915 *this, E, Intrinsic::round,
18916 Intrinsic::experimental_constrained_round))
18917 .getScalarVal();
18918 case PPC::BI__builtin_ppc_frip:
18919 case PPC::BI__builtin_ppc_frips:
18921 *this, E, Intrinsic::ceil,
18922 Intrinsic::experimental_constrained_ceil))
18923 .getScalarVal();
18924 case PPC::BI__builtin_ppc_friz:
18925 case PPC::BI__builtin_ppc_frizs:
18927 *this, E, Intrinsic::trunc,
18928 Intrinsic::experimental_constrained_trunc))
18929 .getScalarVal();
18930 case PPC::BI__builtin_ppc_fsqrt:
18931 case PPC::BI__builtin_ppc_fsqrts:
18933 *this, E, Intrinsic::sqrt,
18934 Intrinsic::experimental_constrained_sqrt))
18935 .getScalarVal();
18936 case PPC::BI__builtin_ppc_test_data_class: {
18937 Value *Op0 = EmitScalarExpr(E->getArg(0));
18938 Value *Op1 = EmitScalarExpr(E->getArg(1));
18939 return Builder.CreateCall(
18940 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18941 {Op0, Op1}, "test_data_class");
18942 }
18943 case PPC::BI__builtin_ppc_maxfe: {
18944 Value *Op0 = EmitScalarExpr(E->getArg(0));
18945 Value *Op1 = EmitScalarExpr(E->getArg(1));
18946 Value *Op2 = EmitScalarExpr(E->getArg(2));
18947 Value *Op3 = EmitScalarExpr(E->getArg(3));
18948 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18949 {Op0, Op1, Op2, Op3});
18950 }
18951 case PPC::BI__builtin_ppc_maxfl: {
18952 Value *Op0 = EmitScalarExpr(E->getArg(0));
18953 Value *Op1 = EmitScalarExpr(E->getArg(1));
18954 Value *Op2 = EmitScalarExpr(E->getArg(2));
18955 Value *Op3 = EmitScalarExpr(E->getArg(3));
18956 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18957 {Op0, Op1, Op2, Op3});
18958 }
18959 case PPC::BI__builtin_ppc_maxfs: {
18960 Value *Op0 = EmitScalarExpr(E->getArg(0));
18961 Value *Op1 = EmitScalarExpr(E->getArg(1));
18962 Value *Op2 = EmitScalarExpr(E->getArg(2));
18963 Value *Op3 = EmitScalarExpr(E->getArg(3));
18964 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18965 {Op0, Op1, Op2, Op3});
18966 }
18967 case PPC::BI__builtin_ppc_minfe: {
18968 Value *Op0 = EmitScalarExpr(E->getArg(0));
18969 Value *Op1 = EmitScalarExpr(E->getArg(1));
18970 Value *Op2 = EmitScalarExpr(E->getArg(2));
18971 Value *Op3 = EmitScalarExpr(E->getArg(3));
18972 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18973 {Op0, Op1, Op2, Op3});
18974 }
18975 case PPC::BI__builtin_ppc_minfl: {
18976 Value *Op0 = EmitScalarExpr(E->getArg(0));
18977 Value *Op1 = EmitScalarExpr(E->getArg(1));
18978 Value *Op2 = EmitScalarExpr(E->getArg(2));
18979 Value *Op3 = EmitScalarExpr(E->getArg(3));
18980 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18981 {Op0, Op1, Op2, Op3});
18982 }
18983 case PPC::BI__builtin_ppc_minfs: {
18984 Value *Op0 = EmitScalarExpr(E->getArg(0));
18985 Value *Op1 = EmitScalarExpr(E->getArg(1));
18986 Value *Op2 = EmitScalarExpr(E->getArg(2));
18987 Value *Op3 = EmitScalarExpr(E->getArg(3));
18988 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18989 {Op0, Op1, Op2, Op3});
18990 }
18991 case PPC::BI__builtin_ppc_swdiv:
18992 case PPC::BI__builtin_ppc_swdivs: {
18993 Value *Op0 = EmitScalarExpr(E->getArg(0));
18994 Value *Op1 = EmitScalarExpr(E->getArg(1));
18995 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18996 }
18997 case PPC::BI__builtin_ppc_set_fpscr_rn:
18998 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18999 {EmitScalarExpr(E->getArg(0))});
19000 case PPC::BI__builtin_ppc_mffs:
19001 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
19002 }
19003}
19004
19005namespace {
19006// If \p E is not null pointer, insert address space cast to match return
19007// type of \p E if necessary.
19008Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
19009 const CallExpr *E = nullptr) {
19010 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
19011 auto *Call = CGF.Builder.CreateCall(F);
19012 Call->addRetAttr(
19013 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
19014 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
19015 if (!E)
19016 return Call;
19017 QualType BuiltinRetType = E->getType();
19018 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
19019 if (RetTy == Call->getType())
19020 return Call;
19021 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
19022}
19023
19024Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
19025 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
19026 auto *Call = CGF.Builder.CreateCall(F);
19027 Call->addRetAttr(
19028 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
19029 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
19030 return Call;
19031}
19032
19033// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19034/// Emit code based on Code Object ABI version.
19035/// COV_4 : Emit code to use dispatch ptr
19036/// COV_5+ : Emit code to use implicitarg ptr
19037/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
19038/// and use its value for COV_4 or COV_5+ approach. It is used for
19039/// compiling device libraries in an ABI-agnostic way.
19040///
19041/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
19042/// clang during compilation of user code.
19043Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
19044 llvm::LoadInst *LD;
19045
19046 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
19047
19048 if (Cov == CodeObjectVersionKind::COV_None) {
19049 StringRef Name = "__oclc_ABI_version";
19050 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
19051 if (!ABIVersionC)
19052 ABIVersionC = new llvm::GlobalVariable(
19053 CGF.CGM.getModule(), CGF.Int32Ty, false,
19054 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
19055 llvm::GlobalVariable::NotThreadLocal,
19057
19058 // This load will be eliminated by the IPSCCP because it is constant
19059 // weak_odr without externally_initialized. Either changing it to weak or
19060 // adding externally_initialized will keep the load.
19061 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
19062 CGF.CGM.getIntAlign());
19063
19064 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
19065 ABIVersion,
19066 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
19067
19068 // Indexing the implicit kernarg segment.
19069 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
19070 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19071
19072 // Indexing the HSA kernel_dispatch_packet struct.
19073 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
19074 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19075
19076 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
19077 LD = CGF.Builder.CreateLoad(
19079 } else {
19080 Value *GEP = nullptr;
19081 if (Cov >= CodeObjectVersionKind::COV_5) {
19082 // Indexing the implicit kernarg segment.
19083 GEP = CGF.Builder.CreateConstGEP1_32(
19084 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19085 } else {
19086 // Indexing the HSA kernel_dispatch_packet struct.
19087 GEP = CGF.Builder.CreateConstGEP1_32(
19088 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19089 }
19090 LD = CGF.Builder.CreateLoad(
19092 }
19093
19094 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
19095 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
19096 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
19097 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
19098 LD->setMetadata(llvm::LLVMContext::MD_noundef,
19099 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19100 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19101 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19102 return LD;
19103}
19104
19105// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19106Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
19107 const unsigned XOffset = 12;
19108 auto *DP = EmitAMDGPUDispatchPtr(CGF);
19109 // Indexing the HSA kernel_dispatch_packet struct.
19110 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
19111 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
19112 auto *LD = CGF.Builder.CreateLoad(
19114
19115 llvm::MDBuilder MDB(CGF.getLLVMContext());
19116
19117 // Known non-zero.
19118 LD->setMetadata(llvm::LLVMContext::MD_range,
19119 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
19120 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19121 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19122 return LD;
19123}
19124} // namespace
19125
19126// For processing memory ordering and memory scope arguments of various
19127// amdgcn builtins.
19128// \p Order takes a C++11 comptabile memory-ordering specifier and converts
19129// it into LLVM's memory ordering specifier using atomic C ABI, and writes
19130// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
19131// specific SyncScopeID and writes it to \p SSID.
19133 llvm::AtomicOrdering &AO,
19134 llvm::SyncScope::ID &SSID) {
19135 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
19136
19137 // Map C11/C++11 memory ordering to LLVM memory ordering
19138 assert(llvm::isValidAtomicOrderingCABI(ord));
19139 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
19140 case llvm::AtomicOrderingCABI::acquire:
19141 case llvm::AtomicOrderingCABI::consume:
19142 AO = llvm::AtomicOrdering::Acquire;
19143 break;
19144 case llvm::AtomicOrderingCABI::release:
19145 AO = llvm::AtomicOrdering::Release;
19146 break;
19147 case llvm::AtomicOrderingCABI::acq_rel:
19148 AO = llvm::AtomicOrdering::AcquireRelease;
19149 break;
19150 case llvm::AtomicOrderingCABI::seq_cst:
19151 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19152 break;
19153 case llvm::AtomicOrderingCABI::relaxed:
19154 AO = llvm::AtomicOrdering::Monotonic;
19155 break;
19156 }
19157
19158 // Some of the atomic builtins take the scope as a string name.
19159 StringRef scp;
19160 if (llvm::getConstantStringInfo(Scope, scp)) {
19161 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
19162 return;
19163 }
19164
19165 // Older builtins had an enum argument for the memory scope.
19166 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
19167 switch (scope) {
19168 case 0: // __MEMORY_SCOPE_SYSTEM
19169 SSID = llvm::SyncScope::System;
19170 break;
19171 case 1: // __MEMORY_SCOPE_DEVICE
19172 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19173 break;
19174 case 2: // __MEMORY_SCOPE_WRKGRP
19175 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
19176 break;
19177 case 3: // __MEMORY_SCOPE_WVFRNT
19178 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
19179 break;
19180 case 4: // __MEMORY_SCOPE_SINGLE
19181 SSID = llvm::SyncScope::SingleThread;
19182 break;
19183 default:
19184 SSID = llvm::SyncScope::System;
19185 break;
19186 }
19187}
19188
19189llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
19190 unsigned Idx,
19191 const CallExpr *E) {
19192 llvm::Value *Arg = nullptr;
19193 if ((ICEArguments & (1 << Idx)) == 0) {
19194 Arg = EmitScalarExpr(E->getArg(Idx));
19195 } else {
19196 // If this is required to be a constant, constant fold it so that we
19197 // know that the generated intrinsic gets a ConstantInt.
19198 std::optional<llvm::APSInt> Result =
19199 E->getArg(Idx)->getIntegerConstantExpr(getContext());
19200 assert(Result && "Expected argument to be a constant");
19201 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
19202 }
19203 return Arg;
19204}
19205
19206// Return dot product intrinsic that corresponds to the QT scalar type
19207static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19208 if (QT->isFloatingType())
19209 return RT.getFDotIntrinsic();
19210 if (QT->isSignedIntegerType())
19211 return RT.getSDotIntrinsic();
19212 assert(QT->isUnsignedIntegerType());
19213 return RT.getUDotIntrinsic();
19214}
19215
19216static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19218 return RT.getFirstBitSHighIntrinsic();
19219 }
19220
19222 return RT.getFirstBitUHighIntrinsic();
19223}
19224
19225// Return wave active sum that corresponds to the QT scalar type
19226static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
19227 CGHLSLRuntime &RT, QualType QT) {
19228 switch (Arch) {
19229 case llvm::Triple::spirv:
19230 return llvm::Intrinsic::spv_wave_reduce_sum;
19231 case llvm::Triple::dxil: {
19232 if (QT->isUnsignedIntegerType())
19233 return llvm::Intrinsic::dx_wave_reduce_usum;
19234 return llvm::Intrinsic::dx_wave_reduce_sum;
19235 }
19236 default:
19237 llvm_unreachable("Intrinsic WaveActiveSum"
19238 " not supported by target architecture");
19239 }
19240}
19241
19243 const CallExpr *E,
19244 ReturnValueSlot ReturnValue) {
19245 if (!getLangOpts().HLSL)
19246 return nullptr;
19247
19248 switch (BuiltinID) {
19249 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19250 Value *HandleOp = EmitScalarExpr(E->getArg(0));
19251 Value *IndexOp = EmitScalarExpr(E->getArg(1));
19252
19253 // TODO: Map to an hlsl_device address space.
19254 llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
19255
19256 return Builder.CreateIntrinsic(
19257 RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
19258 ArrayRef<Value *>{HandleOp, IndexOp});
19259 }
19260 case Builtin::BI__builtin_hlsl_all: {
19261 Value *Op0 = EmitScalarExpr(E->getArg(0));
19262 return Builder.CreateIntrinsic(
19263 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19264 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19265 "hlsl.all");
19266 }
19267 case Builtin::BI__builtin_hlsl_any: {
19268 Value *Op0 = EmitScalarExpr(E->getArg(0));
19269 return Builder.CreateIntrinsic(
19270 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19271 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19272 "hlsl.any");
19273 }
19274 case Builtin::BI__builtin_hlsl_asdouble:
19275 return handleAsDoubleBuiltin(*this, E);
19276 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19277 Value *OpX = EmitScalarExpr(E->getArg(0));
19278 Value *OpMin = EmitScalarExpr(E->getArg(1));
19279 Value *OpMax = EmitScalarExpr(E->getArg(2));
19280
19281 QualType Ty = E->getArg(0)->getType();
19282 if (auto *VecTy = Ty->getAs<VectorType>())
19283 Ty = VecTy->getElementType();
19284
19285 Intrinsic::ID Intr;
19286 if (Ty->isFloatingType()) {
19287 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
19288 } else if (Ty->isUnsignedIntegerType()) {
19289 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
19290 } else {
19291 assert(Ty->isSignedIntegerType());
19292 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
19293 }
19294 return Builder.CreateIntrinsic(
19295 /*ReturnType=*/OpX->getType(), Intr,
19296 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
19297 }
19298 case Builtin::BI__builtin_hlsl_cross: {
19299 Value *Op0 = EmitScalarExpr(E->getArg(0));
19300 Value *Op1 = EmitScalarExpr(E->getArg(1));
19301 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19302 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19303 "cross operands must have a float representation");
19304 // make sure each vector has exactly 3 elements
19305 assert(
19306 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19307 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19308 "input vectors must have 3 elements each");
19309 return Builder.CreateIntrinsic(
19310 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
19311 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
19312 }
19313 case Builtin::BI__builtin_hlsl_dot: {
19314 Value *Op0 = EmitScalarExpr(E->getArg(0));
19315 Value *Op1 = EmitScalarExpr(E->getArg(1));
19316 llvm::Type *T0 = Op0->getType();
19317 llvm::Type *T1 = Op1->getType();
19318
19319 // If the arguments are scalars, just emit a multiply
19320 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19321 if (T0->isFloatingPointTy())
19322 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
19323
19324 if (T0->isIntegerTy())
19325 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
19326
19327 llvm_unreachable(
19328 "Scalar dot product is only supported on ints and floats.");
19329 }
19330 // For vectors, validate types and emit the appropriate intrinsic
19331
19332 // A VectorSplat should have happened
19333 assert(T0->isVectorTy() && T1->isVectorTy() &&
19334 "Dot product of vector and scalar is not supported.");
19335
19336 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
19337 [[maybe_unused]] auto *VecTy1 =
19338 E->getArg(1)->getType()->getAs<VectorType>();
19339
19340 assert(VecTy0->getElementType() == VecTy1->getElementType() &&
19341 "Dot product of vectors need the same element types.");
19342
19343 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
19344 "Dot product requires vectors to be of the same size.");
19345
19346 return Builder.CreateIntrinsic(
19347 /*ReturnType=*/T0->getScalarType(),
19348 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
19349 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
19350 }
19351 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19352 Value *A = EmitScalarExpr(E->getArg(0));
19353 Value *B = EmitScalarExpr(E->getArg(1));
19354 Value *C = EmitScalarExpr(E->getArg(2));
19355
19356 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19357 return Builder.CreateIntrinsic(
19358 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19359 "hlsl.dot4add.i8packed");
19360 }
19361 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19362 Value *A = EmitScalarExpr(E->getArg(0));
19363 Value *B = EmitScalarExpr(E->getArg(1));
19364 Value *C = EmitScalarExpr(E->getArg(2));
19365
19366 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19367 return Builder.CreateIntrinsic(
19368 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19369 "hlsl.dot4add.u8packed");
19370 }
19371 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19372 Value *X = EmitScalarExpr(E->getArg(0));
19373
19374 return Builder.CreateIntrinsic(
19375 /*ReturnType=*/ConvertType(E->getType()),
19377 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
19378 }
19379 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19380 Value *X = EmitScalarExpr(E->getArg(0));
19381
19382 return Builder.CreateIntrinsic(
19383 /*ReturnType=*/ConvertType(E->getType()),
19384 CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
19385 nullptr, "hlsl.firstbitlow");
19386 }
19387 case Builtin::BI__builtin_hlsl_lerp: {
19388 Value *X = EmitScalarExpr(E->getArg(0));
19389 Value *Y = EmitScalarExpr(E->getArg(1));
19390 Value *S = EmitScalarExpr(E->getArg(2));
19391 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19392 llvm_unreachable("lerp operand must have a float representation");
19393 return Builder.CreateIntrinsic(
19394 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
19395 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
19396 }
19397 case Builtin::BI__builtin_hlsl_normalize: {
19398 Value *X = EmitScalarExpr(E->getArg(0));
19399
19400 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19401 "normalize operand must have a float representation");
19402
19403 return Builder.CreateIntrinsic(
19404 /*ReturnType=*/X->getType(),
19405 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
19406 nullptr, "hlsl.normalize");
19407 }
19408 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19409 Value *X = EmitScalarExpr(E->getArg(0));
19410
19411 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19412 "degree operand must have a float representation");
19413
19414 return Builder.CreateIntrinsic(
19415 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
19416 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
19417 }
19418 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19419 Value *Op0 = EmitScalarExpr(E->getArg(0));
19420 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19421 llvm_unreachable("frac operand must have a float representation");
19422 return Builder.CreateIntrinsic(
19423 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
19424 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
19425}
19426case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19427 Value *Op0 = EmitScalarExpr(E->getArg(0));
19428 llvm::Type *Xty = Op0->getType();
19429 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
19430 if (Xty->isVectorTy()) {
19431 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
19432 retType = llvm::VectorType::get(
19433 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19434 }
19435 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19436 llvm_unreachable("isinf operand must have a float representation");
19437 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19438 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
19439 }
19440 case Builtin::BI__builtin_hlsl_mad: {
19441 Value *M = EmitScalarExpr(E->getArg(0));
19442 Value *A = EmitScalarExpr(E->getArg(1));
19443 Value *B = EmitScalarExpr(E->getArg(2));
19444 if (E->getArg(0)->getType()->hasFloatingRepresentation())
19445 return Builder.CreateIntrinsic(
19446 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
19447 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
19448
19449 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19450 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19451 return Builder.CreateIntrinsic(
19452 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
19453 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
19454
19455 Value *Mul = Builder.CreateNSWMul(M, A);
19456 return Builder.CreateNSWAdd(Mul, B);
19457 }
19458 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19459 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19460 return Builder.CreateIntrinsic(
19461 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
19462 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
19463
19464 Value *Mul = Builder.CreateNUWMul(M, A);
19465 return Builder.CreateNUWAdd(Mul, B);
19466 }
19467 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19468 Value *Op0 = EmitScalarExpr(E->getArg(0));
19469 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19470 llvm_unreachable("rcp operand must have a float representation");
19471 llvm::Type *Ty = Op0->getType();
19472 llvm::Type *EltTy = Ty->getScalarType();
19473 Constant *One = Ty->isVectorTy()
19474 ? ConstantVector::getSplat(
19475 ElementCount::getFixed(
19476 cast<FixedVectorType>(Ty)->getNumElements()),
19477 ConstantFP::get(EltTy, 1.0))
19478 : ConstantFP::get(EltTy, 1.0);
19479 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
19480 }
19481 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19482 Value *Op0 = EmitScalarExpr(E->getArg(0));
19483 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19484 llvm_unreachable("rsqrt operand must have a float representation");
19485 return Builder.CreateIntrinsic(
19486 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
19487 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
19488 }
19489 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19490 Value *Op0 = EmitScalarExpr(E->getArg(0));
19491 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19492 "saturate operand must have a float representation");
19493 return Builder.CreateIntrinsic(
19494 /*ReturnType=*/Op0->getType(),
19495 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
19496 nullptr, "hlsl.saturate");
19497 }
19498 case Builtin::BI__builtin_hlsl_select: {
19499 Value *OpCond = EmitScalarExpr(E->getArg(0));
19500 RValue RValTrue = EmitAnyExpr(E->getArg(1));
19501 Value *OpTrue =
19502 RValTrue.isScalar()
19503 ? RValTrue.getScalarVal()
19504 : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
19505 RValue RValFalse = EmitAnyExpr(E->getArg(2));
19506 Value *OpFalse =
19507 RValFalse.isScalar()
19508 ? RValFalse.getScalarVal()
19509 : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
19510
19511 Value *SelectVal =
19512 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
19513 if (!RValTrue.isScalar())
19514 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
19515 ReturnValue.isVolatile());
19516
19517 return SelectVal;
19518 }
19519 case Builtin::BI__builtin_hlsl_step: {
19520 Value *Op0 = EmitScalarExpr(E->getArg(0));
19521 Value *Op1 = EmitScalarExpr(E->getArg(1));
19522 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19523 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19524 "step operands must have a float representation");
19525 return Builder.CreateIntrinsic(
19526 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
19527 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
19528 }
19529 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
19530 Value *Op = EmitScalarExpr(E->getArg(0));
19531 assert(Op->getType()->isIntegerTy(1) &&
19532 "Intrinsic WaveActiveAllTrue operand must be a bool");
19533
19534 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
19535 return EmitRuntimeCall(
19536 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19537 }
19538 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19539 Value *Op = EmitScalarExpr(E->getArg(0));
19540 assert(Op->getType()->isIntegerTy(1) &&
19541 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19542
19543 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19544 return EmitRuntimeCall(
19545 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19546 }
19547 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19548 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19549 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19550 return EmitRuntimeCall(
19551 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
19552 ArrayRef{OpExpr});
19553 }
19554 case Builtin::BI__builtin_hlsl_wave_active_sum: {
19555 // Due to the use of variadic arguments, explicitly retreive argument
19556 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19557 llvm::FunctionType *FT = llvm::FunctionType::get(
19558 OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
19559 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
19560 getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
19561 E->getArg(0)->getType());
19562
19563 // Get overloaded name
19564 std::string Name =
19565 Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
19566 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19567 /*Local=*/false,
19568 /*AssumeConvergent=*/true),
19569 ArrayRef{OpExpr}, "hlsl.wave.active.sum");
19570 }
19571 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19572 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19573 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19574 // for the DirectX intrinsic and the demangled builtin name
19575 switch (CGM.getTarget().getTriple().getArch()) {
19576 case llvm::Triple::dxil:
19577 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19578 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
19579 case llvm::Triple::spirv:
19581 llvm::FunctionType::get(IntTy, {}, false),
19582 "__hlsl_wave_get_lane_index", {}, false, true));
19583 default:
19584 llvm_unreachable(
19585 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19586 }
19587 }
19588 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19589 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19590 return EmitRuntimeCall(
19591 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19592 }
19593 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19594 // Due to the use of variadic arguments we must explicitly retreive them and
19595 // create our function type.
19596 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19597 Value *OpIndex = EmitScalarExpr(E->getArg(1));
19598 llvm::FunctionType *FT = llvm::FunctionType::get(
19599 OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
19600 false);
19601
19602 // Get overloaded name
19603 std::string Name =
19604 Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19605 ArrayRef{OpExpr->getType()}, &CGM.getModule());
19606 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19607 /*Local=*/false,
19608 /*AssumeConvergent=*/true),
19609 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
19610 }
19611 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19612 auto *Arg0 = E->getArg(0);
19613 Value *Op0 = EmitScalarExpr(Arg0);
19614 llvm::Type *Xty = Op0->getType();
19615 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
19616 if (Xty->isVectorTy()) {
19617 auto *XVecTy = Arg0->getType()->getAs<VectorType>();
19618 retType = llvm::VectorType::get(
19619 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19620 }
19621 assert((Arg0->getType()->hasFloatingRepresentation() ||
19622 Arg0->getType()->hasIntegerRepresentation()) &&
19623 "sign operand must have a float or int representation");
19624
19626 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19627 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19628 ConstantInt::get(retType, 1), "hlsl.sign");
19629 }
19630
19631 return Builder.CreateIntrinsic(
19632 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
19633 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
19634 }
19635 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19636 Value *Op0 = EmitScalarExpr(E->getArg(0));
19637 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19638 "radians operand must have a float representation");
19639 return Builder.CreateIntrinsic(
19640 /*ReturnType=*/Op0->getType(),
19641 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
19642 nullptr, "hlsl.radians");
19643 }
19644 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19645 Value *ResHandle = EmitScalarExpr(E->getArg(0));
19646 Value *Offset = EmitScalarExpr(E->getArg(1));
19647 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
19648 return Builder.CreateIntrinsic(
19649 /*ReturnType=*/Offset->getType(),
19650 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19651 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
19652 }
19653 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19654
19655 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
19656 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19657 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19658 "asuint operands types mismatch");
19659 return handleHlslSplitdouble(E, this);
19660 }
19661 case Builtin::BI__builtin_hlsl_elementwise_clip:
19662 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19663 "clip operands types mismatch");
19664 return handleHlslClip(E, this);
19665 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19666 Intrinsic::ID ID =
19667 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19668 return EmitRuntimeCall(
19669 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19670 }
19671 }
19672 return nullptr;
19673}
19674
19675void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
19676 const CallExpr *E) {
19677 constexpr const char *Tag = "amdgpu-as";
19678
19679 LLVMContext &Ctx = Inst->getContext();
19681 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
19682 llvm::Value *V = EmitScalarExpr(E->getArg(K));
19683 StringRef AS;
19684 if (llvm::getConstantStringInfo(V, AS)) {
19685 MMRAs.push_back({Tag, AS});
19686 // TODO: Delete the resulting unused constant?
19687 continue;
19688 }
19689 CGM.Error(E->getExprLoc(),
19690 "expected an address space name as a string literal");
19691 }
19692
19693 llvm::sort(MMRAs);
19694 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19695 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19696}
19697
19699 const CallExpr *E) {
19700 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19701 llvm::SyncScope::ID SSID;
19702 switch (BuiltinID) {
19703 case AMDGPU::BI__builtin_amdgcn_div_scale:
19704 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19705 // Translate from the intrinsics's struct return to the builtin's out
19706 // argument.
19707
19708 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
19709
19710 llvm::Value *X = EmitScalarExpr(E->getArg(0));
19711 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
19712 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
19713
19714 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
19715 X->getType());
19716
19717 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
19718
19719 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
19720 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
19721
19722 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
19723
19724 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
19725 Builder.CreateStore(FlagExt, FlagOutPtr);
19726 return Result;
19727 }
19728 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19729 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19730 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19731 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19732 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19733 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
19734
19735 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
19736 Src0->getType());
19737 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
19738 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19739 }
19740
19741 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19742 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19743 Intrinsic::amdgcn_ds_swizzle);
19744 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19745 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19746 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19748 // Find out if any arguments are required to be integer constant
19749 // expressions.
19750 unsigned ICEArguments = 0;
19752 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19753 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19754 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19755 unsigned Size = DataTy->getPrimitiveSizeInBits();
19756 llvm::Type *IntTy =
19757 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
19758 Function *F =
19759 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
19760 ? Intrinsic::amdgcn_mov_dpp8
19761 : Intrinsic::amdgcn_update_dpp,
19762 IntTy);
19763 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
19764 E->getNumArgs() == 2);
19765 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19766 if (InsertOld)
19767 Args.push_back(llvm::PoisonValue::get(IntTy));
19768 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19769 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19770 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19771 Size < 32) {
19772 if (!DataTy->isIntegerTy())
19773 V = Builder.CreateBitCast(
19774 V, llvm::IntegerType::get(Builder.getContext(), Size));
19775 V = Builder.CreateZExtOrBitCast(V, IntTy);
19776 }
19777 llvm::Type *ExpTy =
19778 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19779 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
19780 }
19781 Value *V = Builder.CreateCall(F, Args);
19782 if (Size < 32 && !DataTy->isIntegerTy())
19783 V = Builder.CreateTrunc(
19784 V, llvm::IntegerType::get(Builder.getContext(), Size));
19785 return Builder.CreateTruncOrBitCast(V, DataTy);
19786 }
19787 case AMDGPU::BI__builtin_amdgcn_permlane16:
19788 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19789 return emitBuiltinWithOneOverloadedType<6>(
19790 *this, E,
19791 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19792 ? Intrinsic::amdgcn_permlane16
19793 : Intrinsic::amdgcn_permlanex16);
19794 case AMDGPU::BI__builtin_amdgcn_permlane64:
19795 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19796 Intrinsic::amdgcn_permlane64);
19797 case AMDGPU::BI__builtin_amdgcn_readlane:
19798 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19799 Intrinsic::amdgcn_readlane);
19800 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19801 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19802 Intrinsic::amdgcn_readfirstlane);
19803 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19804 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19805 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19806 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19807 Intrinsic::amdgcn_div_fixup);
19808 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19809 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19810 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
19811 case AMDGPU::BI__builtin_amdgcn_rcp:
19812 case AMDGPU::BI__builtin_amdgcn_rcpf:
19813 case AMDGPU::BI__builtin_amdgcn_rcph:
19814 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
19815 case AMDGPU::BI__builtin_amdgcn_sqrt:
19816 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19817 case AMDGPU::BI__builtin_amdgcn_sqrth:
19818 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19819 Intrinsic::amdgcn_sqrt);
19820 case AMDGPU::BI__builtin_amdgcn_rsq:
19821 case AMDGPU::BI__builtin_amdgcn_rsqf:
19822 case AMDGPU::BI__builtin_amdgcn_rsqh:
19823 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
19824 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19825 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19826 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19827 Intrinsic::amdgcn_rsq_clamp);
19828 case AMDGPU::BI__builtin_amdgcn_sinf:
19829 case AMDGPU::BI__builtin_amdgcn_sinh:
19830 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
19831 case AMDGPU::BI__builtin_amdgcn_cosf:
19832 case AMDGPU::BI__builtin_amdgcn_cosh:
19833 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
19834 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19835 return EmitAMDGPUDispatchPtr(*this, E);
19836 case AMDGPU::BI__builtin_amdgcn_logf:
19837 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
19838 case AMDGPU::BI__builtin_amdgcn_exp2f:
19839 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19840 Intrinsic::amdgcn_exp2);
19841 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19842 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19843 Intrinsic::amdgcn_log_clamp);
19844 case AMDGPU::BI__builtin_amdgcn_ldexp:
19845 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19846 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19847 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19848 llvm::Function *F =
19849 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19850 return Builder.CreateCall(F, {Src0, Src1});
19851 }
19852 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19853 // The raw instruction has a different behavior for out of bounds exponent
19854 // values (implicit truncation instead of saturate to short_min/short_max).
19855 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19856 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19857 llvm::Function *F =
19858 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
19859 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
19860 }
19861 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19862 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19863 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19864 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19865 Intrinsic::amdgcn_frexp_mant);
19866 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19867 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19868 Value *Src0 = EmitScalarExpr(E->getArg(0));
19869 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19870 { Builder.getInt32Ty(), Src0->getType() });
19871 return Builder.CreateCall(F, Src0);
19872 }
19873 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19874 Value *Src0 = EmitScalarExpr(E->getArg(0));
19875 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19876 { Builder.getInt16Ty(), Src0->getType() });
19877 return Builder.CreateCall(F, Src0);
19878 }
19879 case AMDGPU::BI__builtin_amdgcn_fract:
19880 case AMDGPU::BI__builtin_amdgcn_fractf:
19881 case AMDGPU::BI__builtin_amdgcn_fracth:
19882 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19883 Intrinsic::amdgcn_fract);
19884 case AMDGPU::BI__builtin_amdgcn_lerp:
19885 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19886 Intrinsic::amdgcn_lerp);
19887 case AMDGPU::BI__builtin_amdgcn_ubfe:
19888 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19889 Intrinsic::amdgcn_ubfe);
19890 case AMDGPU::BI__builtin_amdgcn_sbfe:
19891 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19892 Intrinsic::amdgcn_sbfe);
19893 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19894 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19895 llvm::Type *ResultType = ConvertType(E->getType());
19896 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19897 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
19898 return Builder.CreateCall(F, { Src });
19899 }
19900 case AMDGPU::BI__builtin_amdgcn_uicmp:
19901 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19902 case AMDGPU::BI__builtin_amdgcn_sicmp:
19903 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19904 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19905 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19906 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19907
19908 // FIXME-GFX10: How should 32 bit mask be handled?
19909 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
19910 { Builder.getInt64Ty(), Src0->getType() });
19911 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19912 }
19913 case AMDGPU::BI__builtin_amdgcn_fcmp:
19914 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19915 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19916 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19917 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19918
19919 // FIXME-GFX10: How should 32 bit mask be handled?
19920 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
19921 { Builder.getInt64Ty(), Src0->getType() });
19922 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19923 }
19924 case AMDGPU::BI__builtin_amdgcn_class:
19925 case AMDGPU::BI__builtin_amdgcn_classf:
19926 case AMDGPU::BI__builtin_amdgcn_classh:
19927 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
19928 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19929 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19930 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19931 Intrinsic::amdgcn_fmed3);
19932 case AMDGPU::BI__builtin_amdgcn_ds_append:
19933 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19934 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19935 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19936 Value *Src0 = EmitScalarExpr(E->getArg(0));
19937 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
19938 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
19939 }
19940 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19941 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19942 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19943 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19944 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19945 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19946 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19947 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19948 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19949 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19950 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19951 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19952 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19953 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
19954 Intrinsic::ID IID;
19955 switch (BuiltinID) {
19956 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19957 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19958 IID = Intrinsic::amdgcn_global_load_tr_b64;
19959 break;
19960 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19961 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19962 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19963 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19964 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19965 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19966 IID = Intrinsic::amdgcn_global_load_tr_b128;
19967 break;
19968 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19969 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
19970 break;
19971 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19972 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
19973 break;
19974 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19975 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
19976 break;
19977 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
19978 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19979 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19980 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
19981 break;
19982 }
19983 llvm::Type *LoadTy = ConvertType(E->getType());
19984 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
19985 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
19986 return Builder.CreateCall(F, {Addr});
19987 }
19988 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
19989 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
19990 {llvm::Type::getInt64Ty(getLLVMContext())});
19991 return Builder.CreateCall(F);
19992 }
19993 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
19994 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
19995 {llvm::Type::getInt64Ty(getLLVMContext())});
19996 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
19997 return Builder.CreateCall(F, {Env});
19998 }
19999 case AMDGPU::BI__builtin_amdgcn_read_exec:
20000 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
20001 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
20002 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
20003 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
20004 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
20005 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
20006 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
20007 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
20008 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
20009 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
20010 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
20011 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
20012 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
20013 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
20014 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
20015
20016 // The builtins take these arguments as vec4 where the last element is
20017 // ignored. The intrinsic takes them as vec3.
20018 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
20019 ArrayRef<int>{0, 1, 2});
20020 RayDir =
20021 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
20022 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
20023 ArrayRef<int>{0, 1, 2});
20024
20025 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
20026 {NodePtr->getType(), RayDir->getType()});
20027 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
20028 RayInverseDir, TextureDescr});
20029 }
20030
20031 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
20033 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20034 Args.push_back(EmitScalarExpr(E->getArg(i)));
20035
20036 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
20037 Value *Call = Builder.CreateCall(F, Args);
20038 Value *Rtn = Builder.CreateExtractValue(Call, 0);
20039 Value *A = Builder.CreateExtractValue(Call, 1);
20040 llvm::Type *RetTy = ConvertType(E->getType());
20041 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
20042 (uint64_t)0);
20043 return Builder.CreateInsertElement(I0, A, 1);
20044 }
20045 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
20046 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
20047 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
20049 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
20050 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
20051 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
20052 {VT, VT});
20053
20055 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
20056 Args.push_back(EmitScalarExpr(E->getArg(I)));
20057 return Builder.CreateCall(F, Args);
20058 }
20059 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20060 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20061 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20062 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20063 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20064 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20065 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20066 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20067 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20068 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20069 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20070 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20071 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20072 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20073 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20074 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20075 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20076 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20077 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20078 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20079 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20080 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20081 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20082 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20083 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20084 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20085 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20086 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20087 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20088 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20089 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20090 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20091 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20092 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20093 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20094 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20095 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20096 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20097 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20098 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20099 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20100 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20101 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20102 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20103 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20104 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20105 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20106 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20107 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20108 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20109 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20110 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20111 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20112 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20113 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20114 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20115 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20116 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20117 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20118 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
20119
20120 // These operations perform a matrix multiplication and accumulation of
20121 // the form:
20122 // D = A * B + C
20123 // We need to specify one type for matrices AB and one for matrices CD.
20124 // Sparse matrix operations can have different types for A and B as well as
20125 // an additional type for sparsity index.
20126 // Destination type should be put before types used for source operands.
20127 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
20128 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
20129 // There is no need for the variable opsel argument, so always set it to
20130 // "false".
20131 bool AppendFalseForOpselArg = false;
20132 unsigned BuiltinWMMAOp;
20133
20134 switch (BuiltinID) {
20135 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20136 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20137 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20138 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20139 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20140 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20141 break;
20142 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20143 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20144 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20145 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20146 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20147 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20148 break;
20149 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20150 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20151 AppendFalseForOpselArg = true;
20152 [[fallthrough]];
20153 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20154 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20155 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20156 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20157 break;
20158 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20159 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20160 AppendFalseForOpselArg = true;
20161 [[fallthrough]];
20162 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20163 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20164 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20165 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20166 break;
20167 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20168 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20169 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20170 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20171 break;
20172 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20173 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20174 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20175 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20176 break;
20177 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20178 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20179 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20180 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20181 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20182 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20183 break;
20184 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20185 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20186 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20187 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20188 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20189 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20190 break;
20191 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20192 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20193 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20194 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20195 break;
20196 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20197 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20198 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20199 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20200 break;
20201 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20202 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20203 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20204 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20205 break;
20206 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20207 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20208 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20209 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20210 break;
20211 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20212 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20213 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20214 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20215 break;
20216 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20217 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20218 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20219 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20220 break;
20221 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20222 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20223 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20224 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20225 break;
20226 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20227 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20228 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20229 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20230 break;
20231 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20232 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20233 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20234 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20235 break;
20236 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20237 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20238 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20239 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20240 break;
20241 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20242 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20243 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20244 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20245 break;
20246 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20247 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20248 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20249 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20250 break;
20251 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20252 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20253 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20254 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20255 break;
20256 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20257 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20258 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20259 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20260 break;
20261 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20262 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20263 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20264 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20265 break;
20266 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20267 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20268 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20269 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20270 break;
20271 }
20272
20274 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20275 Args.push_back(EmitScalarExpr(E->getArg(i)));
20276 if (AppendFalseForOpselArg)
20277 Args.push_back(Builder.getFalse());
20278
20280 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
20281 ArgTypes.push_back(Args[ArgIdx]->getType());
20282
20283 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
20284 return Builder.CreateCall(F, Args);
20285 }
20286
20287 // amdgcn workitem
20288 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20289 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
20290 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20291 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
20292 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20293 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
20294
20295 // amdgcn workgroup size
20296 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20297 return EmitAMDGPUWorkGroupSize(*this, 0);
20298 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20299 return EmitAMDGPUWorkGroupSize(*this, 1);
20300 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20301 return EmitAMDGPUWorkGroupSize(*this, 2);
20302
20303 // amdgcn grid size
20304 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20305 return EmitAMDGPUGridSize(*this, 0);
20306 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20307 return EmitAMDGPUGridSize(*this, 1);
20308 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20309 return EmitAMDGPUGridSize(*this, 2);
20310
20311 // r600 intrinsics
20312 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20313 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20314 return emitBuiltinWithOneOverloadedType<1>(*this, E,
20315 Intrinsic::r600_recipsqrt_ieee);
20316 case AMDGPU::BI__builtin_r600_read_tidig_x:
20317 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
20318 case AMDGPU::BI__builtin_r600_read_tidig_y:
20319 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
20320 case AMDGPU::BI__builtin_r600_read_tidig_z:
20321 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
20322 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20323 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
20324 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
20325 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
20326 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
20327 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20328 }
20329 case AMDGPU::BI__builtin_amdgcn_fence: {
20331 EmitScalarExpr(E->getArg(1)), AO, SSID);
20332 FenceInst *Fence = Builder.CreateFence(AO, SSID);
20333 if (E->getNumArgs() > 2)
20335 return Fence;
20336 }
20337 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20338 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20339 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20340 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20341 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20342 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20343 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20344 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20345 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20346 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20347 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20348 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20349 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20350 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20351 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20352 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20353 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20354 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20355 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20356 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20357 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20358 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20359 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20360 llvm::AtomicRMWInst::BinOp BinOp;
20361 switch (BuiltinID) {
20362 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20363 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20364 BinOp = llvm::AtomicRMWInst::UIncWrap;
20365 break;
20366 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20367 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20368 BinOp = llvm::AtomicRMWInst::UDecWrap;
20369 break;
20370 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20371 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20372 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20373 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20374 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20375 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20376 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20377 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20378 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20379 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20380 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20381 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20382 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20383 BinOp = llvm::AtomicRMWInst::FAdd;
20384 break;
20385 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20386 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20387 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20388 BinOp = llvm::AtomicRMWInst::FMin;
20389 break;
20390 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20391 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20392 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20393 BinOp = llvm::AtomicRMWInst::FMax;
20394 break;
20395 }
20396
20397 Address Ptr = CheckAtomicAlignment(*this, E);
20398 Value *Val = EmitScalarExpr(E->getArg(1));
20399 llvm::Type *OrigTy = Val->getType();
20400 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
20401
20402 bool Volatile;
20403
20404 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20405 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20406 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20407 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20408 Volatile =
20409 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
20410 } else {
20411 // Infer volatile from the passed type.
20412 Volatile =
20414 }
20415
20416 if (E->getNumArgs() >= 4) {
20417 // Some of the builtins have explicit ordering and scope arguments.
20419 EmitScalarExpr(E->getArg(3)), AO, SSID);
20420 } else {
20421 // Most of the builtins do not have syncscope/order arguments. For DS
20422 // atomics the scope doesn't really matter, as they implicitly operate at
20423 // workgroup scope.
20424 //
20425 // The global/flat cases need to use agent scope to consistently produce
20426 // the native instruction instead of a cmpxchg expansion.
20427 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
20428 AO = AtomicOrdering::Monotonic;
20429
20430 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20431 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20432 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20433 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20434 llvm::Type *V2BF16Ty = FixedVectorType::get(
20435 llvm::Type::getBFloatTy(Builder.getContext()), 2);
20436 Val = Builder.CreateBitCast(Val, V2BF16Ty);
20437 }
20438 }
20439
20440 llvm::AtomicRMWInst *RMW =
20441 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
20442 if (Volatile)
20443 RMW->setVolatile(true);
20444
20445 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
20446 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20447 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20448 // instruction for flat and global operations.
20449 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
20450 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
20451
20452 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20453 // instruction, but this only matters for float fadd.
20454 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
20455 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
20456 }
20457
20458 return Builder.CreateBitCast(RMW, OrigTy);
20459 }
20460 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20461 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20462 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
20463 llvm::Type *ResultType = ConvertType(E->getType());
20464 // s_sendmsg_rtn is mangled using return type only.
20465 Function *F =
20466 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
20467 return Builder.CreateCall(F, {Arg});
20468 }
20469 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20470 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20471 // Because builtin types are limited, and the intrinsic uses a struct/pair
20472 // output, marshal the pair-of-i32 to <2 x i32>.
20473 Value *VDstOld = EmitScalarExpr(E->getArg(0));
20474 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
20475 Value *FI = EmitScalarExpr(E->getArg(2));
20476 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
20477 Function *F =
20478 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20479 ? Intrinsic::amdgcn_permlane16_swap
20480 : Intrinsic::amdgcn_permlane32_swap);
20481 llvm::CallInst *Call =
20482 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20483
20484 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
20485 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
20486
20487 llvm::Type *ResultType = ConvertType(E->getType());
20488
20489 llvm::Value *Insert0 = Builder.CreateInsertElement(
20490 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20491 llvm::Value *AsVector =
20492 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20493 return AsVector;
20494 }
20495 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20496 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20497 return emitBuiltinWithOneOverloadedType<4>(*this, E,
20498 Intrinsic::amdgcn_bitop3);
20499 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20500 return emitBuiltinWithOneOverloadedType<4>(
20501 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20502 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20503 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20504 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20505 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20506 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20507 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20508 return emitBuiltinWithOneOverloadedType<5>(
20509 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20510 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20511 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20512 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20513 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20514 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20515 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20516 llvm::Type *RetTy = nullptr;
20517 switch (BuiltinID) {
20518 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20519 RetTy = Int8Ty;
20520 break;
20521 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20522 RetTy = Int16Ty;
20523 break;
20524 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20525 RetTy = Int32Ty;
20526 break;
20527 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20528 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
20529 break;
20530 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20531 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
20532 break;
20533 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20534 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
20535 break;
20536 }
20537 Function *F =
20538 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
20539 return Builder.CreateCall(
20540 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
20541 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
20542 }
20543 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20544 return emitBuiltinWithOneOverloadedType<2>(
20545 *this, E, Intrinsic::amdgcn_s_prefetch_data);
20546 default:
20547 return nullptr;
20548 }
20549}
20550
20552 const CallExpr *E) {
20553 switch (BuiltinID) {
20554 case SPIRV::BI__builtin_spirv_distance: {
20555 Value *X = EmitScalarExpr(E->getArg(0));
20556 Value *Y = EmitScalarExpr(E->getArg(1));
20557 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20558 E->getArg(1)->getType()->hasFloatingRepresentation() &&
20559 "Distance operands must have a float representation");
20560 assert(E->getArg(0)->getType()->isVectorType() &&
20561 E->getArg(1)->getType()->isVectorType() &&
20562 "Distance operands must be a vector");
20563 return Builder.CreateIntrinsic(
20564 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance,
20565 ArrayRef<Value *>{X, Y}, nullptr, "spv.distance");
20566 }
20567 case SPIRV::BI__builtin_spirv_length: {
20568 Value *X = EmitScalarExpr(E->getArg(0));
20569 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20570 "length operand must have a float representation");
20571 assert(E->getArg(0)->getType()->isVectorType() &&
20572 "length operand must be a vector");
20573 return Builder.CreateIntrinsic(
20574 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_length,
20575 ArrayRef<Value *>{X}, nullptr, "spv.length");
20576 }
20577 }
20578 return nullptr;
20579}
20580
20581/// Handle a SystemZ function in which the final argument is a pointer
20582/// to an int that receives the post-instruction CC value. At the LLVM level
20583/// this is represented as a function that returns a {result, cc} pair.
20585 unsigned IntrinsicID,
20586 const CallExpr *E) {
20587 unsigned NumArgs = E->getNumArgs() - 1;
20588 SmallVector<Value *, 8> Args(NumArgs);
20589 for (unsigned I = 0; I < NumArgs; ++I)
20590 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
20591 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
20592 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
20593 Value *Call = CGF.Builder.CreateCall(F, Args);
20594 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
20595 CGF.Builder.CreateStore(CC, CCPtr);
20596 return CGF.Builder.CreateExtractValue(Call, 0);
20597}
20598
20600 const CallExpr *E) {
20601 switch (BuiltinID) {
20602 case SystemZ::BI__builtin_tbegin: {
20603 Value *TDB = EmitScalarExpr(E->getArg(0));
20604 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20605 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
20606 return Builder.CreateCall(F, {TDB, Control});
20607 }
20608 case SystemZ::BI__builtin_tbegin_nofloat: {
20609 Value *TDB = EmitScalarExpr(E->getArg(0));
20610 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20611 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
20612 return Builder.CreateCall(F, {TDB, Control});
20613 }
20614 case SystemZ::BI__builtin_tbeginc: {
20615 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
20616 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
20617 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
20618 return Builder.CreateCall(F, {TDB, Control});
20619 }
20620 case SystemZ::BI__builtin_tabort: {
20621 Value *Data = EmitScalarExpr(E->getArg(0));
20622 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
20623 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
20624 }
20625 case SystemZ::BI__builtin_non_tx_store: {
20626 Value *Address = EmitScalarExpr(E->getArg(0));
20627 Value *Data = EmitScalarExpr(E->getArg(1));
20628 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
20629 return Builder.CreateCall(F, {Data, Address});
20630 }
20631
20632 // Vector builtins. Note that most vector builtins are mapped automatically
20633 // to target-specific LLVM intrinsics. The ones handled specially here can
20634 // be represented via standard LLVM IR, which is preferable to enable common
20635 // LLVM optimizations.
20636
20637 case SystemZ::BI__builtin_s390_vclzb:
20638 case SystemZ::BI__builtin_s390_vclzh:
20639 case SystemZ::BI__builtin_s390_vclzf:
20640 case SystemZ::BI__builtin_s390_vclzg:
20641 case SystemZ::BI__builtin_s390_vclzq: {
20642 llvm::Type *ResultType = ConvertType(E->getType());
20643 Value *X = EmitScalarExpr(E->getArg(0));
20644 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20645 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
20646 return Builder.CreateCall(F, {X, Undef});
20647 }
20648
20649 case SystemZ::BI__builtin_s390_vctzb:
20650 case SystemZ::BI__builtin_s390_vctzh:
20651 case SystemZ::BI__builtin_s390_vctzf:
20652 case SystemZ::BI__builtin_s390_vctzg:
20653 case SystemZ::BI__builtin_s390_vctzq: {
20654 llvm::Type *ResultType = ConvertType(E->getType());
20655 Value *X = EmitScalarExpr(E->getArg(0));
20656 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20657 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
20658 return Builder.CreateCall(F, {X, Undef});
20659 }
20660
20661 case SystemZ::BI__builtin_s390_verllb:
20662 case SystemZ::BI__builtin_s390_verllh:
20663 case SystemZ::BI__builtin_s390_verllf:
20664 case SystemZ::BI__builtin_s390_verllg: {
20665 llvm::Type *ResultType = ConvertType(E->getType());
20666 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20667 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20668 // Splat scalar rotate amount to vector type.
20669 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20670 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
20671 Amt = Builder.CreateVectorSplat(NumElts, Amt);
20672 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20673 return Builder.CreateCall(F, { Src, Src, Amt });
20674 }
20675
20676 case SystemZ::BI__builtin_s390_verllvb:
20677 case SystemZ::BI__builtin_s390_verllvh:
20678 case SystemZ::BI__builtin_s390_verllvf:
20679 case SystemZ::BI__builtin_s390_verllvg: {
20680 llvm::Type *ResultType = ConvertType(E->getType());
20681 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20682 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20683 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20684 return Builder.CreateCall(F, { Src, Src, Amt });
20685 }
20686
20687 case SystemZ::BI__builtin_s390_vfsqsb:
20688 case SystemZ::BI__builtin_s390_vfsqdb: {
20689 llvm::Type *ResultType = ConvertType(E->getType());
20690 Value *X = EmitScalarExpr(E->getArg(0));
20691 if (Builder.getIsFPConstrained()) {
20692 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
20693 return Builder.CreateConstrainedFPCall(F, { X });
20694 } else {
20695 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
20696 return Builder.CreateCall(F, X);
20697 }
20698 }
20699 case SystemZ::BI__builtin_s390_vfmasb:
20700 case SystemZ::BI__builtin_s390_vfmadb: {
20701 llvm::Type *ResultType = ConvertType(E->getType());
20702 Value *X = EmitScalarExpr(E->getArg(0));
20703 Value *Y = EmitScalarExpr(E->getArg(1));
20704 Value *Z = EmitScalarExpr(E->getArg(2));
20705 if (Builder.getIsFPConstrained()) {
20706 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20707 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
20708 } else {
20709 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20710 return Builder.CreateCall(F, {X, Y, Z});
20711 }
20712 }
20713 case SystemZ::BI__builtin_s390_vfmssb:
20714 case SystemZ::BI__builtin_s390_vfmsdb: {
20715 llvm::Type *ResultType = ConvertType(E->getType());
20716 Value *X = EmitScalarExpr(E->getArg(0));
20717 Value *Y = EmitScalarExpr(E->getArg(1));
20718 Value *Z = EmitScalarExpr(E->getArg(2));
20719 if (Builder.getIsFPConstrained()) {
20720 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20721 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20722 } else {
20723 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20724 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20725 }
20726 }
20727 case SystemZ::BI__builtin_s390_vfnmasb:
20728 case SystemZ::BI__builtin_s390_vfnmadb: {
20729 llvm::Type *ResultType = ConvertType(E->getType());
20730 Value *X = EmitScalarExpr(E->getArg(0));
20731 Value *Y = EmitScalarExpr(E->getArg(1));
20732 Value *Z = EmitScalarExpr(E->getArg(2));
20733 if (Builder.getIsFPConstrained()) {
20734 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20735 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
20736 } else {
20737 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20738 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
20739 }
20740 }
20741 case SystemZ::BI__builtin_s390_vfnmssb:
20742 case SystemZ::BI__builtin_s390_vfnmsdb: {
20743 llvm::Type *ResultType = ConvertType(E->getType());
20744 Value *X = EmitScalarExpr(E->getArg(0));
20745 Value *Y = EmitScalarExpr(E->getArg(1));
20746 Value *Z = EmitScalarExpr(E->getArg(2));
20747 if (Builder.getIsFPConstrained()) {
20748 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20749 Value *NegZ = Builder.CreateFNeg(Z, "sub");
20750 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20751 } else {
20752 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20753 Value *NegZ = Builder.CreateFNeg(Z, "neg");
20754 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
20755 }
20756 }
20757 case SystemZ::BI__builtin_s390_vflpsb:
20758 case SystemZ::BI__builtin_s390_vflpdb: {
20759 llvm::Type *ResultType = ConvertType(E->getType());
20760 Value *X = EmitScalarExpr(E->getArg(0));
20761 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20762 return Builder.CreateCall(F, X);
20763 }
20764 case SystemZ::BI__builtin_s390_vflnsb:
20765 case SystemZ::BI__builtin_s390_vflndb: {
20766 llvm::Type *ResultType = ConvertType(E->getType());
20767 Value *X = EmitScalarExpr(E->getArg(0));
20768 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20769 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
20770 }
20771 case SystemZ::BI__builtin_s390_vfisb:
20772 case SystemZ::BI__builtin_s390_vfidb: {
20773 llvm::Type *ResultType = ConvertType(E->getType());
20774 Value *X = EmitScalarExpr(E->getArg(0));
20775 // Constant-fold the M4 and M5 mask arguments.
20776 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
20777 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20778 // Check whether this instance can be represented via a LLVM standard
20779 // intrinsic. We only support some combinations of M4 and M5.
20780 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20781 Intrinsic::ID CI;
20782 switch (M4.getZExtValue()) {
20783 default: break;
20784 case 0: // IEEE-inexact exception allowed
20785 switch (M5.getZExtValue()) {
20786 default: break;
20787 case 0: ID = Intrinsic::rint;
20788 CI = Intrinsic::experimental_constrained_rint; break;
20789 }
20790 break;
20791 case 4: // IEEE-inexact exception suppressed
20792 switch (M5.getZExtValue()) {
20793 default: break;
20794 case 0: ID = Intrinsic::nearbyint;
20795 CI = Intrinsic::experimental_constrained_nearbyint; break;
20796 case 1: ID = Intrinsic::round;
20797 CI = Intrinsic::experimental_constrained_round; break;
20798 case 5: ID = Intrinsic::trunc;
20799 CI = Intrinsic::experimental_constrained_trunc; break;
20800 case 6: ID = Intrinsic::ceil;
20801 CI = Intrinsic::experimental_constrained_ceil; break;
20802 case 7: ID = Intrinsic::floor;
20803 CI = Intrinsic::experimental_constrained_floor; break;
20804 }
20805 break;
20806 }
20807 if (ID != Intrinsic::not_intrinsic) {
20808 if (Builder.getIsFPConstrained()) {
20809 Function *F = CGM.getIntrinsic(CI, ResultType);
20810 return Builder.CreateConstrainedFPCall(F, X);
20811 } else {
20812 Function *F = CGM.getIntrinsic(ID, ResultType);
20813 return Builder.CreateCall(F, X);
20814 }
20815 }
20816 switch (BuiltinID) { // FIXME: constrained version?
20817 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
20818 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
20819 default: llvm_unreachable("Unknown BuiltinID");
20820 }
20821 Function *F = CGM.getIntrinsic(ID);
20822 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20823 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
20824 return Builder.CreateCall(F, {X, M4Value, M5Value});
20825 }
20826 case SystemZ::BI__builtin_s390_vfmaxsb:
20827 case SystemZ::BI__builtin_s390_vfmaxdb: {
20828 llvm::Type *ResultType = ConvertType(E->getType());
20829 Value *X = EmitScalarExpr(E->getArg(0));
20830 Value *Y = EmitScalarExpr(E->getArg(1));
20831 // Constant-fold the M4 mask argument.
20832 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20833 // Check whether this instance can be represented via a LLVM standard
20834 // intrinsic. We only support some values of M4.
20835 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20836 Intrinsic::ID CI;
20837 switch (M4.getZExtValue()) {
20838 default: break;
20839 case 4: ID = Intrinsic::maxnum;
20840 CI = Intrinsic::experimental_constrained_maxnum; break;
20841 }
20842 if (ID != Intrinsic::not_intrinsic) {
20843 if (Builder.getIsFPConstrained()) {
20844 Function *F = CGM.getIntrinsic(CI, ResultType);
20845 return Builder.CreateConstrainedFPCall(F, {X, Y});
20846 } else {
20847 Function *F = CGM.getIntrinsic(ID, ResultType);
20848 return Builder.CreateCall(F, {X, Y});
20849 }
20850 }
20851 switch (BuiltinID) {
20852 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
20853 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
20854 default: llvm_unreachable("Unknown BuiltinID");
20855 }
20856 Function *F = CGM.getIntrinsic(ID);
20857 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20858 return Builder.CreateCall(F, {X, Y, M4Value});
20859 }
20860 case SystemZ::BI__builtin_s390_vfminsb:
20861 case SystemZ::BI__builtin_s390_vfmindb: {
20862 llvm::Type *ResultType = ConvertType(E->getType());
20863 Value *X = EmitScalarExpr(E->getArg(0));
20864 Value *Y = EmitScalarExpr(E->getArg(1));
20865 // Constant-fold the M4 mask argument.
20866 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20867 // Check whether this instance can be represented via a LLVM standard
20868 // intrinsic. We only support some values of M4.
20869 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20870 Intrinsic::ID CI;
20871 switch (M4.getZExtValue()) {
20872 default: break;
20873 case 4: ID = Intrinsic::minnum;
20874 CI = Intrinsic::experimental_constrained_minnum; break;
20875 }
20876 if (ID != Intrinsic::not_intrinsic) {
20877 if (Builder.getIsFPConstrained()) {
20878 Function *F = CGM.getIntrinsic(CI, ResultType);
20879 return Builder.CreateConstrainedFPCall(F, {X, Y});
20880 } else {
20881 Function *F = CGM.getIntrinsic(ID, ResultType);
20882 return Builder.CreateCall(F, {X, Y});
20883 }
20884 }
20885 switch (BuiltinID) {
20886 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
20887 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
20888 default: llvm_unreachable("Unknown BuiltinID");
20889 }
20890 Function *F = CGM.getIntrinsic(ID);
20891 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20892 return Builder.CreateCall(F, {X, Y, M4Value});
20893 }
20894
20895 case SystemZ::BI__builtin_s390_vlbrh:
20896 case SystemZ::BI__builtin_s390_vlbrf:
20897 case SystemZ::BI__builtin_s390_vlbrg:
20898 case SystemZ::BI__builtin_s390_vlbrq: {
20899 llvm::Type *ResultType = ConvertType(E->getType());
20900 Value *X = EmitScalarExpr(E->getArg(0));
20901 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
20902 return Builder.CreateCall(F, X);
20903 }
20904
20905 // Vector intrinsics that output the post-instruction CC value.
20906
20907#define INTRINSIC_WITH_CC(NAME) \
20908 case SystemZ::BI__builtin_##NAME: \
20909 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20910
20911 INTRINSIC_WITH_CC(s390_vpkshs);
20912 INTRINSIC_WITH_CC(s390_vpksfs);
20913 INTRINSIC_WITH_CC(s390_vpksgs);
20914
20915 INTRINSIC_WITH_CC(s390_vpklshs);
20916 INTRINSIC_WITH_CC(s390_vpklsfs);
20917 INTRINSIC_WITH_CC(s390_vpklsgs);
20918
20919 INTRINSIC_WITH_CC(s390_vceqbs);
20920 INTRINSIC_WITH_CC(s390_vceqhs);
20921 INTRINSIC_WITH_CC(s390_vceqfs);
20922 INTRINSIC_WITH_CC(s390_vceqgs);
20923 INTRINSIC_WITH_CC(s390_vceqqs);
20924
20925 INTRINSIC_WITH_CC(s390_vchbs);
20926 INTRINSIC_WITH_CC(s390_vchhs);
20927 INTRINSIC_WITH_CC(s390_vchfs);
20928 INTRINSIC_WITH_CC(s390_vchgs);
20929 INTRINSIC_WITH_CC(s390_vchqs);
20930
20931 INTRINSIC_WITH_CC(s390_vchlbs);
20932 INTRINSIC_WITH_CC(s390_vchlhs);
20933 INTRINSIC_WITH_CC(s390_vchlfs);
20934 INTRINSIC_WITH_CC(s390_vchlgs);
20935 INTRINSIC_WITH_CC(s390_vchlqs);
20936
20937 INTRINSIC_WITH_CC(s390_vfaebs);
20938 INTRINSIC_WITH_CC(s390_vfaehs);
20939 INTRINSIC_WITH_CC(s390_vfaefs);
20940
20941 INTRINSIC_WITH_CC(s390_vfaezbs);
20942 INTRINSIC_WITH_CC(s390_vfaezhs);
20943 INTRINSIC_WITH_CC(s390_vfaezfs);
20944
20945 INTRINSIC_WITH_CC(s390_vfeebs);
20946 INTRINSIC_WITH_CC(s390_vfeehs);
20947 INTRINSIC_WITH_CC(s390_vfeefs);
20948
20949 INTRINSIC_WITH_CC(s390_vfeezbs);
20950 INTRINSIC_WITH_CC(s390_vfeezhs);
20951 INTRINSIC_WITH_CC(s390_vfeezfs);
20952
20953 INTRINSIC_WITH_CC(s390_vfenebs);
20954 INTRINSIC_WITH_CC(s390_vfenehs);
20955 INTRINSIC_WITH_CC(s390_vfenefs);
20956
20957 INTRINSIC_WITH_CC(s390_vfenezbs);
20958 INTRINSIC_WITH_CC(s390_vfenezhs);
20959 INTRINSIC_WITH_CC(s390_vfenezfs);
20960
20961 INTRINSIC_WITH_CC(s390_vistrbs);
20962 INTRINSIC_WITH_CC(s390_vistrhs);
20963 INTRINSIC_WITH_CC(s390_vistrfs);
20964
20965 INTRINSIC_WITH_CC(s390_vstrcbs);
20966 INTRINSIC_WITH_CC(s390_vstrchs);
20967 INTRINSIC_WITH_CC(s390_vstrcfs);
20968
20969 INTRINSIC_WITH_CC(s390_vstrczbs);
20970 INTRINSIC_WITH_CC(s390_vstrczhs);
20971 INTRINSIC_WITH_CC(s390_vstrczfs);
20972
20973 INTRINSIC_WITH_CC(s390_vfcesbs);
20974 INTRINSIC_WITH_CC(s390_vfcedbs);
20975 INTRINSIC_WITH_CC(s390_vfchsbs);
20976 INTRINSIC_WITH_CC(s390_vfchdbs);
20977 INTRINSIC_WITH_CC(s390_vfchesbs);
20978 INTRINSIC_WITH_CC(s390_vfchedbs);
20979
20980 INTRINSIC_WITH_CC(s390_vftcisb);
20981 INTRINSIC_WITH_CC(s390_vftcidb);
20982
20983 INTRINSIC_WITH_CC(s390_vstrsb);
20984 INTRINSIC_WITH_CC(s390_vstrsh);
20985 INTRINSIC_WITH_CC(s390_vstrsf);
20986
20987 INTRINSIC_WITH_CC(s390_vstrszb);
20988 INTRINSIC_WITH_CC(s390_vstrszh);
20989 INTRINSIC_WITH_CC(s390_vstrszf);
20990
20991#undef INTRINSIC_WITH_CC
20992
20993 default:
20994 return nullptr;
20995 }
20996}
20997
20998namespace {
20999// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
21000struct NVPTXMmaLdstInfo {
21001 unsigned NumResults; // Number of elements to load/store
21002 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
21003 unsigned IID_col;
21004 unsigned IID_row;
21005};
21006
21007#define MMA_INTR(geom_op_type, layout) \
21008 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
21009#define MMA_LDST(n, geom_op_type) \
21010 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
21011
21012static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
21013 switch (BuiltinID) {
21014 // FP MMA loads
21015 case NVPTX::BI__hmma_m16n16k16_ld_a:
21016 return MMA_LDST(8, m16n16k16_load_a_f16);
21017 case NVPTX::BI__hmma_m16n16k16_ld_b:
21018 return MMA_LDST(8, m16n16k16_load_b_f16);
21019 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21020 return MMA_LDST(4, m16n16k16_load_c_f16);
21021 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21022 return MMA_LDST(8, m16n16k16_load_c_f32);
21023 case NVPTX::BI__hmma_m32n8k16_ld_a:
21024 return MMA_LDST(8, m32n8k16_load_a_f16);
21025 case NVPTX::BI__hmma_m32n8k16_ld_b:
21026 return MMA_LDST(8, m32n8k16_load_b_f16);
21027 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21028 return MMA_LDST(4, m32n8k16_load_c_f16);
21029 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21030 return MMA_LDST(8, m32n8k16_load_c_f32);
21031 case NVPTX::BI__hmma_m8n32k16_ld_a:
21032 return MMA_LDST(8, m8n32k16_load_a_f16);
21033 case NVPTX::BI__hmma_m8n32k16_ld_b:
21034 return MMA_LDST(8, m8n32k16_load_b_f16);
21035 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21036 return MMA_LDST(4, m8n32k16_load_c_f16);
21037 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21038 return MMA_LDST(8, m8n32k16_load_c_f32);
21039
21040 // Integer MMA loads
21041 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21042 return MMA_LDST(2, m16n16k16_load_a_s8);
21043 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21044 return MMA_LDST(2, m16n16k16_load_a_u8);
21045 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21046 return MMA_LDST(2, m16n16k16_load_b_s8);
21047 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21048 return MMA_LDST(2, m16n16k16_load_b_u8);
21049 case NVPTX::BI__imma_m16n16k16_ld_c:
21050 return MMA_LDST(8, m16n16k16_load_c_s32);
21051 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21052 return MMA_LDST(4, m32n8k16_load_a_s8);
21053 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21054 return MMA_LDST(4, m32n8k16_load_a_u8);
21055 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21056 return MMA_LDST(1, m32n8k16_load_b_s8);
21057 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21058 return MMA_LDST(1, m32n8k16_load_b_u8);
21059 case NVPTX::BI__imma_m32n8k16_ld_c:
21060 return MMA_LDST(8, m32n8k16_load_c_s32);
21061 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21062 return MMA_LDST(1, m8n32k16_load_a_s8);
21063 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21064 return MMA_LDST(1, m8n32k16_load_a_u8);
21065 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21066 return MMA_LDST(4, m8n32k16_load_b_s8);
21067 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21068 return MMA_LDST(4, m8n32k16_load_b_u8);
21069 case NVPTX::BI__imma_m8n32k16_ld_c:
21070 return MMA_LDST(8, m8n32k16_load_c_s32);
21071
21072 // Sub-integer MMA loads.
21073 // Only row/col layout is supported by A/B fragments.
21074 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21075 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
21076 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21077 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
21078 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21079 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
21080 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21081 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
21082 case NVPTX::BI__imma_m8n8k32_ld_c:
21083 return MMA_LDST(2, m8n8k32_load_c_s32);
21084 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21085 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
21086 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21087 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
21088 case NVPTX::BI__bmma_m8n8k128_ld_c:
21089 return MMA_LDST(2, m8n8k128_load_c_s32);
21090
21091 // Double MMA loads
21092 case NVPTX::BI__dmma_m8n8k4_ld_a:
21093 return MMA_LDST(1, m8n8k4_load_a_f64);
21094 case NVPTX::BI__dmma_m8n8k4_ld_b:
21095 return MMA_LDST(1, m8n8k4_load_b_f64);
21096 case NVPTX::BI__dmma_m8n8k4_ld_c:
21097 return MMA_LDST(2, m8n8k4_load_c_f64);
21098
21099 // Alternate float MMA loads
21100 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21101 return MMA_LDST(4, m16n16k16_load_a_bf16);
21102 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21103 return MMA_LDST(4, m16n16k16_load_b_bf16);
21104 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21105 return MMA_LDST(2, m8n32k16_load_a_bf16);
21106 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21107 return MMA_LDST(8, m8n32k16_load_b_bf16);
21108 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21109 return MMA_LDST(8, m32n8k16_load_a_bf16);
21110 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21111 return MMA_LDST(2, m32n8k16_load_b_bf16);
21112 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21113 return MMA_LDST(4, m16n16k8_load_a_tf32);
21114 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21115 return MMA_LDST(4, m16n16k8_load_b_tf32);
21116 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
21117 return MMA_LDST(8, m16n16k8_load_c_f32);
21118
21119 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
21120 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
21121 // use fragment C for both loads and stores.
21122 // FP MMA stores.
21123 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21124 return MMA_LDST(4, m16n16k16_store_d_f16);
21125 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21126 return MMA_LDST(8, m16n16k16_store_d_f32);
21127 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21128 return MMA_LDST(4, m32n8k16_store_d_f16);
21129 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21130 return MMA_LDST(8, m32n8k16_store_d_f32);
21131 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21132 return MMA_LDST(4, m8n32k16_store_d_f16);
21133 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21134 return MMA_LDST(8, m8n32k16_store_d_f32);
21135
21136 // Integer and sub-integer MMA stores.
21137 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
21138 // name, integer loads/stores use LLVM's i32.
21139 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21140 return MMA_LDST(8, m16n16k16_store_d_s32);
21141 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21142 return MMA_LDST(8, m32n8k16_store_d_s32);
21143 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21144 return MMA_LDST(8, m8n32k16_store_d_s32);
21145 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21146 return MMA_LDST(2, m8n8k32_store_d_s32);
21147 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21148 return MMA_LDST(2, m8n8k128_store_d_s32);
21149
21150 // Double MMA store
21151 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21152 return MMA_LDST(2, m8n8k4_store_d_f64);
21153
21154 // Alternate float MMA store
21155 case NVPTX::BI__mma_m16n16k8_st_c_f32:
21156 return MMA_LDST(8, m16n16k8_store_d_f32);
21157
21158 default:
21159 llvm_unreachable("Unknown MMA builtin");
21160 }
21161}
21162#undef MMA_LDST
21163#undef MMA_INTR
21164
21165
21166struct NVPTXMmaInfo {
21167 unsigned NumEltsA;
21168 unsigned NumEltsB;
21169 unsigned NumEltsC;
21170 unsigned NumEltsD;
21171
21172 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21173 // over 'col' for layout. The index of non-satf variants is expected to match
21174 // the undocumented layout constants used by CUDA's mma.hpp.
21175 std::array<unsigned, 8> Variants;
21176
21177 unsigned getMMAIntrinsic(int Layout, bool Satf) {
21178 unsigned Index = Layout + 4 * Satf;
21179 if (Index >= Variants.size())
21180 return 0;
21181 return Variants[Index];
21182 }
21183};
21184
21185 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21186 // Layout and Satf, 0 otherwise.
21187static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
21188 // clang-format off
21189#define MMA_VARIANTS(geom, type) \
21190 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21191 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21192 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21193 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21194#define MMA_SATF_VARIANTS(geom, type) \
21195 MMA_VARIANTS(geom, type), \
21196 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21197 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21198 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21199 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21200// Sub-integer MMA only supports row.col layout.
21201#define MMA_VARIANTS_I4(geom, type) \
21202 0, \
21203 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21204 0, \
21205 0, \
21206 0, \
21207 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21208 0, \
21209 0
21210// b1 MMA does not support .satfinite.
21211#define MMA_VARIANTS_B1_XOR(geom, type) \
21212 0, \
21213 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21214 0, \
21215 0, \
21216 0, \
21217 0, \
21218 0, \
21219 0
21220#define MMA_VARIANTS_B1_AND(geom, type) \
21221 0, \
21222 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21223 0, \
21224 0, \
21225 0, \
21226 0, \
21227 0, \
21228 0
21229 // clang-format on
21230 switch (BuiltinID) {
21231 // FP MMA
21232 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21233 // NumEltsN of return value are ordered as A,B,C,D.
21234 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21235 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
21236 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21237 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
21238 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21239 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
21240 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21241 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
21242 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21243 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
21244 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21245 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
21246 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21247 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
21248 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21249 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
21250 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21251 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
21252 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21253 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
21254 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21255 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
21256 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21257 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
21258
21259 // Integer MMA
21260 case NVPTX::BI__imma_m16n16k16_mma_s8:
21261 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
21262 case NVPTX::BI__imma_m16n16k16_mma_u8:
21263 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
21264 case NVPTX::BI__imma_m32n8k16_mma_s8:
21265 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
21266 case NVPTX::BI__imma_m32n8k16_mma_u8:
21267 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
21268 case NVPTX::BI__imma_m8n32k16_mma_s8:
21269 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
21270 case NVPTX::BI__imma_m8n32k16_mma_u8:
21271 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
21272
21273 // Sub-integer MMA
21274 case NVPTX::BI__imma_m8n8k32_mma_s4:
21275 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
21276 case NVPTX::BI__imma_m8n8k32_mma_u4:
21277 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
21278 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21279 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
21280 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21281 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
21282
21283 // Double MMA
21284 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21285 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
21286
21287 // Alternate FP MMA
21288 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21289 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
21290 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21291 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
21292 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21293 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
21294 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21295 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
21296 default:
21297 llvm_unreachable("Unexpected builtin ID.");
21298 }
21299#undef MMA_VARIANTS
21300#undef MMA_SATF_VARIANTS
21301#undef MMA_VARIANTS_I4
21302#undef MMA_VARIANTS_B1_AND
21303#undef MMA_VARIANTS_B1_XOR
21304}
21305
21306static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
21307 const CallExpr *E) {
21308 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21309 QualType ArgType = E->getArg(0)->getType();
21311 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21312 return CGF.Builder.CreateCall(
21313 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21314 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21315}
21316
21317static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
21318 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21319 QualType ArgType = E->getArg(0)->getType();
21321 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21322
21323 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21324 auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
21325 auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
21326 MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
21327 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21328
21329 return LD;
21330}
21331
21332static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
21333 const CallExpr *E) {
21334 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21335 llvm::Type *ElemTy =
21336 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21337 return CGF.Builder.CreateCall(
21338 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21339 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21340}
21341
21342static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
21343 CodeGenFunction &CGF, const CallExpr *E,
21344 int SrcSize) {
21345 return E->getNumArgs() == 3
21346 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
21347 {CGF.EmitScalarExpr(E->getArg(0)),
21348 CGF.EmitScalarExpr(E->getArg(1)),
21349 CGF.EmitScalarExpr(E->getArg(2))})
21350 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
21351 {CGF.EmitScalarExpr(E->getArg(0)),
21352 CGF.EmitScalarExpr(E->getArg(1))});
21353}
21354
21355static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
21356 const CallExpr *E, CodeGenFunction &CGF) {
21357 auto &C = CGF.CGM.getContext();
21358 if (!(C.getLangOpts().NativeHalfType ||
21359 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
21360 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21361 " requires native half type support.");
21362 return nullptr;
21363 }
21364
21365 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21366 return MakeLdg(CGF, E);
21367
21368 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21369 return MakeLdu(IntrinsicID, CGF, E);
21370
21372 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
21373 auto *FTy = F->getFunctionType();
21374 unsigned ICEArguments = 0;
21376 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21377 assert(Error == ASTContext::GE_None && "Should not codegen an error");
21378 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
21379 assert((ICEArguments & (1 << i)) == 0);
21380 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
21381 auto *PTy = FTy->getParamType(i);
21382 if (PTy != ArgValue->getType())
21383 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
21384 Args.push_back(ArgValue);
21385 }
21386
21387 return CGF.Builder.CreateCall(F, Args);
21388}
21389} // namespace
21390
21392 const CallExpr *E) {
21393 switch (BuiltinID) {
21394 case NVPTX::BI__nvvm_atom_add_gen_i:
21395 case NVPTX::BI__nvvm_atom_add_gen_l:
21396 case NVPTX::BI__nvvm_atom_add_gen_ll:
21397 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
21398
21399 case NVPTX::BI__nvvm_atom_sub_gen_i:
21400 case NVPTX::BI__nvvm_atom_sub_gen_l:
21401 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21402 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
21403
21404 case NVPTX::BI__nvvm_atom_and_gen_i:
21405 case NVPTX::BI__nvvm_atom_and_gen_l:
21406 case NVPTX::BI__nvvm_atom_and_gen_ll:
21407 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
21408
21409 case NVPTX::BI__nvvm_atom_or_gen_i:
21410 case NVPTX::BI__nvvm_atom_or_gen_l:
21411 case NVPTX::BI__nvvm_atom_or_gen_ll:
21412 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
21413
21414 case NVPTX::BI__nvvm_atom_xor_gen_i:
21415 case NVPTX::BI__nvvm_atom_xor_gen_l:
21416 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21417 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
21418
21419 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21420 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21421 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21422 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
21423
21424 case NVPTX::BI__nvvm_atom_max_gen_i:
21425 case NVPTX::BI__nvvm_atom_max_gen_l:
21426 case NVPTX::BI__nvvm_atom_max_gen_ll:
21427 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
21428
21429 case NVPTX::BI__nvvm_atom_max_gen_ui:
21430 case NVPTX::BI__nvvm_atom_max_gen_ul:
21431 case NVPTX::BI__nvvm_atom_max_gen_ull:
21432 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
21433
21434 case NVPTX::BI__nvvm_atom_min_gen_i:
21435 case NVPTX::BI__nvvm_atom_min_gen_l:
21436 case NVPTX::BI__nvvm_atom_min_gen_ll:
21437 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
21438
21439 case NVPTX::BI__nvvm_atom_min_gen_ui:
21440 case NVPTX::BI__nvvm_atom_min_gen_ul:
21441 case NVPTX::BI__nvvm_atom_min_gen_ull:
21442 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
21443
21444 case NVPTX::BI__nvvm_atom_cas_gen_us:
21445 case NVPTX::BI__nvvm_atom_cas_gen_i:
21446 case NVPTX::BI__nvvm_atom_cas_gen_l:
21447 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21448 // __nvvm_atom_cas_gen_* should return the old value rather than the
21449 // success flag.
21450 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
21451
21452 case NVPTX::BI__nvvm_atom_add_gen_f:
21453 case NVPTX::BI__nvvm_atom_add_gen_d: {
21454 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
21455 Value *Val = EmitScalarExpr(E->getArg(1));
21456
21457 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
21458 AtomicOrdering::SequentiallyConsistent);
21459 }
21460
21461 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21462 Value *Ptr = EmitScalarExpr(E->getArg(0));
21463 Value *Val = EmitScalarExpr(E->getArg(1));
21464 Function *FnALI32 =
21465 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
21466 return Builder.CreateCall(FnALI32, {Ptr, Val});
21467 }
21468
21469 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21470 Value *Ptr = EmitScalarExpr(E->getArg(0));
21471 Value *Val = EmitScalarExpr(E->getArg(1));
21472 Function *FnALD32 =
21473 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
21474 return Builder.CreateCall(FnALD32, {Ptr, Val});
21475 }
21476
21477 case NVPTX::BI__nvvm_ldg_c:
21478 case NVPTX::BI__nvvm_ldg_sc:
21479 case NVPTX::BI__nvvm_ldg_c2:
21480 case NVPTX::BI__nvvm_ldg_sc2:
21481 case NVPTX::BI__nvvm_ldg_c4:
21482 case NVPTX::BI__nvvm_ldg_sc4:
21483 case NVPTX::BI__nvvm_ldg_s:
21484 case NVPTX::BI__nvvm_ldg_s2:
21485 case NVPTX::BI__nvvm_ldg_s4:
21486 case NVPTX::BI__nvvm_ldg_i:
21487 case NVPTX::BI__nvvm_ldg_i2:
21488 case NVPTX::BI__nvvm_ldg_i4:
21489 case NVPTX::BI__nvvm_ldg_l:
21490 case NVPTX::BI__nvvm_ldg_l2:
21491 case NVPTX::BI__nvvm_ldg_ll:
21492 case NVPTX::BI__nvvm_ldg_ll2:
21493 case NVPTX::BI__nvvm_ldg_uc:
21494 case NVPTX::BI__nvvm_ldg_uc2:
21495 case NVPTX::BI__nvvm_ldg_uc4:
21496 case NVPTX::BI__nvvm_ldg_us:
21497 case NVPTX::BI__nvvm_ldg_us2:
21498 case NVPTX::BI__nvvm_ldg_us4:
21499 case NVPTX::BI__nvvm_ldg_ui:
21500 case NVPTX::BI__nvvm_ldg_ui2:
21501 case NVPTX::BI__nvvm_ldg_ui4:
21502 case NVPTX::BI__nvvm_ldg_ul:
21503 case NVPTX::BI__nvvm_ldg_ul2:
21504 case NVPTX::BI__nvvm_ldg_ull:
21505 case NVPTX::BI__nvvm_ldg_ull2:
21506 case NVPTX::BI__nvvm_ldg_f:
21507 case NVPTX::BI__nvvm_ldg_f2:
21508 case NVPTX::BI__nvvm_ldg_f4:
21509 case NVPTX::BI__nvvm_ldg_d:
21510 case NVPTX::BI__nvvm_ldg_d2:
21511 // PTX Interoperability section 2.2: "For a vector with an even number of
21512 // elements, its alignment is set to number of elements times the alignment
21513 // of its member: n*alignof(t)."
21514 return MakeLdg(*this, E);
21515
21516 case NVPTX::BI__nvvm_ldu_c:
21517 case NVPTX::BI__nvvm_ldu_sc:
21518 case NVPTX::BI__nvvm_ldu_c2:
21519 case NVPTX::BI__nvvm_ldu_sc2:
21520 case NVPTX::BI__nvvm_ldu_c4:
21521 case NVPTX::BI__nvvm_ldu_sc4:
21522 case NVPTX::BI__nvvm_ldu_s:
21523 case NVPTX::BI__nvvm_ldu_s2:
21524 case NVPTX::BI__nvvm_ldu_s4:
21525 case NVPTX::BI__nvvm_ldu_i:
21526 case NVPTX::BI__nvvm_ldu_i2:
21527 case NVPTX::BI__nvvm_ldu_i4:
21528 case NVPTX::BI__nvvm_ldu_l:
21529 case NVPTX::BI__nvvm_ldu_l2:
21530 case NVPTX::BI__nvvm_ldu_ll:
21531 case NVPTX::BI__nvvm_ldu_ll2:
21532 case NVPTX::BI__nvvm_ldu_uc:
21533 case NVPTX::BI__nvvm_ldu_uc2:
21534 case NVPTX::BI__nvvm_ldu_uc4:
21535 case NVPTX::BI__nvvm_ldu_us:
21536 case NVPTX::BI__nvvm_ldu_us2:
21537 case NVPTX::BI__nvvm_ldu_us4:
21538 case NVPTX::BI__nvvm_ldu_ui:
21539 case NVPTX::BI__nvvm_ldu_ui2:
21540 case NVPTX::BI__nvvm_ldu_ui4:
21541 case NVPTX::BI__nvvm_ldu_ul:
21542 case NVPTX::BI__nvvm_ldu_ul2:
21543 case NVPTX::BI__nvvm_ldu_ull:
21544 case NVPTX::BI__nvvm_ldu_ull2:
21545 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
21546 case NVPTX::BI__nvvm_ldu_f:
21547 case NVPTX::BI__nvvm_ldu_f2:
21548 case NVPTX::BI__nvvm_ldu_f4:
21549 case NVPTX::BI__nvvm_ldu_d:
21550 case NVPTX::BI__nvvm_ldu_d2:
21551 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
21552
21553 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21554 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21555 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21556 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
21557 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21558 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21559 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21560 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
21561 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21562 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21563 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
21564 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21565 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21566 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
21567 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21568 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21569 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21570 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
21571 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21572 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21573 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21574 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
21575 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21576 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21577 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21578 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21579 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21580 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21581 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
21582 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21583 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21584 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21585 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21586 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21587 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21588 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
21589 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21590 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21591 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21592 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21593 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21594 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21595 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
21596 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21597 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21598 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21599 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21600 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21601 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21602 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
21603 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21604 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
21605 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21606 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
21607 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21608 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
21609 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21610 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
21611 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21612 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21613 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21614 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
21615 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21616 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21617 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21618 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
21619 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21620 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21621 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21622 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
21623 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21624 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21625 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21626 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
21627 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21628 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21629 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21630 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
21631 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21632 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21633 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21634 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
21635 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21636 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21637 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21638 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21639 Value *Ptr = EmitScalarExpr(E->getArg(0));
21640 llvm::Type *ElemTy =
21641 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21642 return Builder.CreateCall(
21644 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21645 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21646 }
21647 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21648 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21649 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21650 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21651 Value *Ptr = EmitScalarExpr(E->getArg(0));
21652 llvm::Type *ElemTy =
21653 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21654 return Builder.CreateCall(
21656 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21657 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21658 }
21659 case NVPTX::BI__nvvm_match_all_sync_i32p:
21660 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21661 Value *Mask = EmitScalarExpr(E->getArg(0));
21662 Value *Val = EmitScalarExpr(E->getArg(1));
21663 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
21664 Value *ResultPair = Builder.CreateCall(
21665 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
21666 ? Intrinsic::nvvm_match_all_sync_i32p
21667 : Intrinsic::nvvm_match_all_sync_i64p),
21668 {Mask, Val});
21669 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
21670 PredOutPtr.getElementType());
21671 Builder.CreateStore(Pred, PredOutPtr);
21672 return Builder.CreateExtractValue(ResultPair, 0);
21673 }
21674
21675 // FP MMA loads
21676 case NVPTX::BI__hmma_m16n16k16_ld_a:
21677 case NVPTX::BI__hmma_m16n16k16_ld_b:
21678 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21679 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21680 case NVPTX::BI__hmma_m32n8k16_ld_a:
21681 case NVPTX::BI__hmma_m32n8k16_ld_b:
21682 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21683 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21684 case NVPTX::BI__hmma_m8n32k16_ld_a:
21685 case NVPTX::BI__hmma_m8n32k16_ld_b:
21686 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21687 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21688 // Integer MMA loads.
21689 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21690 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21691 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21692 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21693 case NVPTX::BI__imma_m16n16k16_ld_c:
21694 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21695 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21696 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21697 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21698 case NVPTX::BI__imma_m32n8k16_ld_c:
21699 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21700 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21701 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21702 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21703 case NVPTX::BI__imma_m8n32k16_ld_c:
21704 // Sub-integer MMA loads.
21705 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21706 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21707 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21708 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21709 case NVPTX::BI__imma_m8n8k32_ld_c:
21710 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21711 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21712 case NVPTX::BI__bmma_m8n8k128_ld_c:
21713 // Double MMA loads.
21714 case NVPTX::BI__dmma_m8n8k4_ld_a:
21715 case NVPTX::BI__dmma_m8n8k4_ld_b:
21716 case NVPTX::BI__dmma_m8n8k4_ld_c:
21717 // Alternate float MMA loads.
21718 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21719 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21720 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21721 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21722 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21723 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21724 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21725 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21726 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21727 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21728 Value *Src = EmitScalarExpr(E->getArg(1));
21729 Value *Ldm = EmitScalarExpr(E->getArg(2));
21730 std::optional<llvm::APSInt> isColMajorArg =
21731 E->getArg(3)->getIntegerConstantExpr(getContext());
21732 if (!isColMajorArg)
21733 return nullptr;
21734 bool isColMajor = isColMajorArg->getSExtValue();
21735 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21736 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21737 if (IID == 0)
21738 return nullptr;
21739
21740 Value *Result =
21741 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
21742
21743 // Save returned values.
21744 assert(II.NumResults);
21745 if (II.NumResults == 1) {
21748 } else {
21749 for (unsigned i = 0; i < II.NumResults; ++i) {
21751 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
21752 Dst.getElementType()),
21754 llvm::ConstantInt::get(IntTy, i)),
21756 }
21757 }
21758 return Result;
21759 }
21760
21761 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21762 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21763 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21764 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21765 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21766 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21767 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21768 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21769 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21770 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21771 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21772 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21773 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21774 Value *Dst = EmitScalarExpr(E->getArg(0));
21775 Address Src = EmitPointerWithAlignment(E->getArg(1));
21776 Value *Ldm = EmitScalarExpr(E->getArg(2));
21777 std::optional<llvm::APSInt> isColMajorArg =
21778 E->getArg(3)->getIntegerConstantExpr(getContext());
21779 if (!isColMajorArg)
21780 return nullptr;
21781 bool isColMajor = isColMajorArg->getSExtValue();
21782 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21783 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21784 if (IID == 0)
21785 return nullptr;
21786 Function *Intrinsic =
21787 CGM.getIntrinsic(IID, Dst->getType());
21788 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21789 SmallVector<Value *, 10> Values = {Dst};
21790 for (unsigned i = 0; i < II.NumResults; ++i) {
21792 Src.getElementType(),
21794 llvm::ConstantInt::get(IntTy, i)),
21796 Values.push_back(Builder.CreateBitCast(V, ParamType));
21797 }
21798 Values.push_back(Ldm);
21799 Value *Result = Builder.CreateCall(Intrinsic, Values);
21800 return Result;
21801 }
21802
21803 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21804 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21805 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21806 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21807 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21808 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21809 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21810 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21811 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21812 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21813 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21814 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21815 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21816 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21817 case NVPTX::BI__imma_m16n16k16_mma_s8:
21818 case NVPTX::BI__imma_m16n16k16_mma_u8:
21819 case NVPTX::BI__imma_m32n8k16_mma_s8:
21820 case NVPTX::BI__imma_m32n8k16_mma_u8:
21821 case NVPTX::BI__imma_m8n32k16_mma_s8:
21822 case NVPTX::BI__imma_m8n32k16_mma_u8:
21823 case NVPTX::BI__imma_m8n8k32_mma_s4:
21824 case NVPTX::BI__imma_m8n8k32_mma_u4:
21825 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21826 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21827 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21828 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21829 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21830 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21831 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21832 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21833 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
21834 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
21835 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
21836 std::optional<llvm::APSInt> LayoutArg =
21837 E->getArg(4)->getIntegerConstantExpr(getContext());
21838 if (!LayoutArg)
21839 return nullptr;
21840 int Layout = LayoutArg->getSExtValue();
21841 if (Layout < 0 || Layout > 3)
21842 return nullptr;
21843 llvm::APSInt SatfArg;
21844 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21845 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21846 SatfArg = 0; // .b1 does not have satf argument.
21847 else if (std::optional<llvm::APSInt> OptSatfArg =
21848 E->getArg(5)->getIntegerConstantExpr(getContext()))
21849 SatfArg = *OptSatfArg;
21850 else
21851 return nullptr;
21852 bool Satf = SatfArg.getSExtValue();
21853 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21854 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21855 if (IID == 0) // Unsupported combination of Layout/Satf.
21856 return nullptr;
21857
21859 Function *Intrinsic = CGM.getIntrinsic(IID);
21860 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21861 // Load A
21862 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
21864 SrcA.getElementType(),
21865 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
21866 llvm::ConstantInt::get(IntTy, i)),
21868 Values.push_back(Builder.CreateBitCast(V, AType));
21869 }
21870 // Load B
21871 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21872 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
21874 SrcB.getElementType(),
21875 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
21876 llvm::ConstantInt::get(IntTy, i)),
21878 Values.push_back(Builder.CreateBitCast(V, BType));
21879 }
21880 // Load C
21881 llvm::Type *CType =
21882 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21883 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
21885 SrcC.getElementType(),
21886 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
21887 llvm::ConstantInt::get(IntTy, i)),
21889 Values.push_back(Builder.CreateBitCast(V, CType));
21890 }
21891 Value *Result = Builder.CreateCall(Intrinsic, Values);
21892 llvm::Type *DType = Dst.getElementType();
21893 for (unsigned i = 0; i < MI.NumEltsD; ++i)
21895 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
21897 llvm::ConstantInt::get(IntTy, i)),
21899 return Result;
21900 }
21901 // The following builtins require half type support
21902 case NVPTX::BI__nvvm_ex2_approx_f16:
21903 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
21904 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21905 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
21906 case NVPTX::BI__nvvm_ff2f16x2_rn:
21907 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
21908 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21909 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
21910 case NVPTX::BI__nvvm_ff2f16x2_rz:
21911 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
21912 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21913 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
21914 case NVPTX::BI__nvvm_fma_rn_f16:
21915 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
21916 case NVPTX::BI__nvvm_fma_rn_f16x2:
21917 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
21918 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21919 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
21920 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21921 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
21922 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21923 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
21924 *this);
21925 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21926 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
21927 *this);
21928 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21929 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
21930 *this);
21931 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21932 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
21933 *this);
21934 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21935 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
21936 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21937 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
21938 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21939 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
21940 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21941 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
21942 case NVPTX::BI__nvvm_fmax_f16:
21943 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
21944 case NVPTX::BI__nvvm_fmax_f16x2:
21945 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
21946 case NVPTX::BI__nvvm_fmax_ftz_f16:
21947 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
21948 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
21949 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
21950 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
21951 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
21952 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
21953 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
21954 *this);
21955 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
21956 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
21957 E, *this);
21958 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
21959 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
21960 BuiltinID, E, *this);
21961 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
21962 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
21963 *this);
21964 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
21965 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
21966 E, *this);
21967 case NVPTX::BI__nvvm_fmax_nan_f16:
21968 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
21969 case NVPTX::BI__nvvm_fmax_nan_f16x2:
21970 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
21971 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
21972 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
21973 *this);
21974 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
21975 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
21976 E, *this);
21977 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
21978 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
21979 *this);
21980 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
21981 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
21982 *this);
21983 case NVPTX::BI__nvvm_fmin_f16:
21984 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
21985 case NVPTX::BI__nvvm_fmin_f16x2:
21986 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
21987 case NVPTX::BI__nvvm_fmin_ftz_f16:
21988 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
21989 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
21990 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
21991 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
21992 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
21993 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
21994 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
21995 *this);
21996 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
21997 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
21998 E, *this);
21999 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
22000 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
22001 BuiltinID, E, *this);
22002 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
22003 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
22004 *this);
22005 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
22006 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
22007 E, *this);
22008 case NVPTX::BI__nvvm_fmin_nan_f16:
22009 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
22010 case NVPTX::BI__nvvm_fmin_nan_f16x2:
22011 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
22012 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
22013 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
22014 *this);
22015 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
22016 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
22017 E, *this);
22018 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
22019 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
22020 *this);
22021 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
22022 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
22023 *this);
22024 case NVPTX::BI__nvvm_ldg_h:
22025 case NVPTX::BI__nvvm_ldg_h2:
22026 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
22027 case NVPTX::BI__nvvm_ldu_h:
22028 case NVPTX::BI__nvvm_ldu_h2:
22029 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
22030 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
22031 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
22032 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
22033 4);
22034 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
22035 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
22036 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
22037 8);
22038 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
22039 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
22040 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
22041 16);
22042 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
22043 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
22044 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
22045 16);
22046 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
22047 return Builder.CreateCall(
22048 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
22049 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
22050 return Builder.CreateCall(
22051 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
22052 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
22053 return Builder.CreateCall(
22054 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
22055 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
22056 return Builder.CreateCall(
22057 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
22058 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
22059 return Builder.CreateCall(
22060 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
22061 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
22062 return Builder.CreateCall(
22063 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
22064 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
22065 return Builder.CreateCall(
22066 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
22067 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
22068 return Builder.CreateCall(
22069 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
22070 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
22071 return Builder.CreateCall(
22072 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
22073 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
22074 return Builder.CreateCall(
22075 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
22076 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
22077 return Builder.CreateCall(
22078 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
22079 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
22080 return Builder.CreateCall(
22081 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
22082 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
22083 return Builder.CreateCall(
22084 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
22085 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
22086 return Builder.CreateCall(
22087 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
22088 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
22089 return Builder.CreateCall(
22090 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
22091 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
22092 return Builder.CreateCall(
22093 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
22094 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
22095 return Builder.CreateCall(
22096 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
22097 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
22098 return Builder.CreateCall(
22099 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
22100 case NVPTX::BI__nvvm_is_explicit_cluster:
22101 return Builder.CreateCall(
22102 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
22103 case NVPTX::BI__nvvm_isspacep_shared_cluster:
22104 return Builder.CreateCall(
22105 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
22106 EmitScalarExpr(E->getArg(0)));
22107 case NVPTX::BI__nvvm_mapa:
22108 return Builder.CreateCall(
22109 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
22110 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22111 case NVPTX::BI__nvvm_mapa_shared_cluster:
22112 return Builder.CreateCall(
22113 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
22114 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22115 case NVPTX::BI__nvvm_getctarank:
22116 return Builder.CreateCall(
22117 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
22118 EmitScalarExpr(E->getArg(0)));
22119 case NVPTX::BI__nvvm_getctarank_shared_cluster:
22120 return Builder.CreateCall(
22121 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
22122 EmitScalarExpr(E->getArg(0)));
22123 case NVPTX::BI__nvvm_barrier_cluster_arrive:
22124 return Builder.CreateCall(
22125 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
22126 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
22127 return Builder.CreateCall(
22128 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
22129 case NVPTX::BI__nvvm_barrier_cluster_wait:
22130 return Builder.CreateCall(
22131 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
22132 case NVPTX::BI__nvvm_fence_sc_cluster:
22133 return Builder.CreateCall(
22134 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
22135 default:
22136 return nullptr;
22137 }
22138}
22139
22140namespace {
22141struct BuiltinAlignArgs {
22142 llvm::Value *Src = nullptr;
22143 llvm::Type *SrcType = nullptr;
22144 llvm::Value *Alignment = nullptr;
22145 llvm::Value *Mask = nullptr;
22146 llvm::IntegerType *IntType = nullptr;
22147
22148 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
22149 QualType AstType = E->getArg(0)->getType();
22150 if (AstType->isArrayType())
22151 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
22152 else
22153 Src = CGF.EmitScalarExpr(E->getArg(0));
22154 SrcType = Src->getType();
22155 if (SrcType->isPointerTy()) {
22156 IntType = IntegerType::get(
22157 CGF.getLLVMContext(),
22158 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
22159 } else {
22160 assert(SrcType->isIntegerTy());
22161 IntType = cast<llvm::IntegerType>(SrcType);
22162 }
22163 Alignment = CGF.EmitScalarExpr(E->getArg(1));
22164 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22165 auto *One = llvm::ConstantInt::get(IntType, 1);
22166 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22167 }
22168};
22169} // namespace
22170
22171/// Generate (x & (y-1)) == 0.
22173 BuiltinAlignArgs Args(E, *this);
22174 llvm::Value *SrcAddress = Args.Src;
22175 if (Args.SrcType->isPointerTy())
22176 SrcAddress =
22177 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22178 return RValue::get(Builder.CreateICmpEQ(
22179 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22180 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22181}
22182
22183/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22184/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22185/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22187 BuiltinAlignArgs Args(E, *this);
22188 llvm::Value *SrcForMask = Args.Src;
22189 if (AlignUp) {
22190 // When aligning up we have to first add the mask to ensure we go over the
22191 // next alignment value and then align down to the next valid multiple.
22192 // By adding the mask, we ensure that align_up on an already aligned
22193 // value will not change the value.
22194 if (Args.Src->getType()->isPointerTy()) {
22195 if (getLangOpts().isSignedOverflowDefined())
22196 SrcForMask =
22197 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22198 else
22199 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22200 /*SignedIndices=*/true,
22201 /*isSubtraction=*/false,
22202 E->getExprLoc(), "over_boundary");
22203 } else {
22204 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22205 }
22206 }
22207 // Invert the mask to only clear the lower bits.
22208 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22209 llvm::Value *Result = nullptr;
22210 if (Args.Src->getType()->isPointerTy()) {
22211 Result = Builder.CreateIntrinsic(
22212 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22213 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22214 } else {
22215 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22216 }
22217 assert(Result->getType() == Args.SrcType);
22218 return RValue::get(Result);
22219}
22220
22222 const CallExpr *E) {
22223 switch (BuiltinID) {
22224 case WebAssembly::BI__builtin_wasm_memory_size: {
22225 llvm::Type *ResultType = ConvertType(E->getType());
22226 Value *I = EmitScalarExpr(E->getArg(0));
22227 Function *Callee =
22228 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22229 return Builder.CreateCall(Callee, I);
22230 }
22231 case WebAssembly::BI__builtin_wasm_memory_grow: {
22232 llvm::Type *ResultType = ConvertType(E->getType());
22233 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22234 EmitScalarExpr(E->getArg(1))};
22235 Function *Callee =
22236 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22237 return Builder.CreateCall(Callee, Args);
22238 }
22239 case WebAssembly::BI__builtin_wasm_tls_size: {
22240 llvm::Type *ResultType = ConvertType(E->getType());
22241 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22242 return Builder.CreateCall(Callee);
22243 }
22244 case WebAssembly::BI__builtin_wasm_tls_align: {
22245 llvm::Type *ResultType = ConvertType(E->getType());
22246 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22247 return Builder.CreateCall(Callee);
22248 }
22249 case WebAssembly::BI__builtin_wasm_tls_base: {
22250 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22251 return Builder.CreateCall(Callee);
22252 }
22253 case WebAssembly::BI__builtin_wasm_throw: {
22254 Value *Tag = EmitScalarExpr(E->getArg(0));
22255 Value *Obj = EmitScalarExpr(E->getArg(1));
22256 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22257 return Builder.CreateCall(Callee, {Tag, Obj});
22258 }
22259 case WebAssembly::BI__builtin_wasm_rethrow: {
22260 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22261 return Builder.CreateCall(Callee);
22262 }
22263 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22264 Value *Addr = EmitScalarExpr(E->getArg(0));
22265 Value *Expected = EmitScalarExpr(E->getArg(1));
22266 Value *Timeout = EmitScalarExpr(E->getArg(2));
22267 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22268 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22269 }
22270 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22271 Value *Addr = EmitScalarExpr(E->getArg(0));
22272 Value *Expected = EmitScalarExpr(E->getArg(1));
22273 Value *Timeout = EmitScalarExpr(E->getArg(2));
22274 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22275 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22276 }
22277 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22278 Value *Addr = EmitScalarExpr(E->getArg(0));
22279 Value *Count = EmitScalarExpr(E->getArg(1));
22280 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22281 return Builder.CreateCall(Callee, {Addr, Count});
22282 }
22283 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22284 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22285 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22286 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22287 Value *Src = EmitScalarExpr(E->getArg(0));
22288 llvm::Type *ResT = ConvertType(E->getType());
22289 Function *Callee =
22290 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22291 return Builder.CreateCall(Callee, {Src});
22292 }
22293 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22294 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22295 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22296 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22297 Value *Src = EmitScalarExpr(E->getArg(0));
22298 llvm::Type *ResT = ConvertType(E->getType());
22299 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22300 {ResT, Src->getType()});
22301 return Builder.CreateCall(Callee, {Src});
22302 }
22303 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22304 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22305 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22306 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22307 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22308 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22309 Value *Src = EmitScalarExpr(E->getArg(0));
22310 llvm::Type *ResT = ConvertType(E->getType());
22311 Function *Callee =
22312 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22313 return Builder.CreateCall(Callee, {Src});
22314 }
22315 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22316 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22317 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22318 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22319 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22320 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22321 Value *Src = EmitScalarExpr(E->getArg(0));
22322 llvm::Type *ResT = ConvertType(E->getType());
22323 Function *Callee =
22324 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22325 return Builder.CreateCall(Callee, {Src});
22326 }
22327 case WebAssembly::BI__builtin_wasm_min_f32:
22328 case WebAssembly::BI__builtin_wasm_min_f64:
22329 case WebAssembly::BI__builtin_wasm_min_f16x8:
22330 case WebAssembly::BI__builtin_wasm_min_f32x4:
22331 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22332 Value *LHS = EmitScalarExpr(E->getArg(0));
22333 Value *RHS = EmitScalarExpr(E->getArg(1));
22334 Function *Callee =
22335 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22336 return Builder.CreateCall(Callee, {LHS, RHS});
22337 }
22338 case WebAssembly::BI__builtin_wasm_max_f32:
22339 case WebAssembly::BI__builtin_wasm_max_f64:
22340 case WebAssembly::BI__builtin_wasm_max_f16x8:
22341 case WebAssembly::BI__builtin_wasm_max_f32x4:
22342 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22343 Value *LHS = EmitScalarExpr(E->getArg(0));
22344 Value *RHS = EmitScalarExpr(E->getArg(1));
22345 Function *Callee =
22346 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22347 return Builder.CreateCall(Callee, {LHS, RHS});
22348 }
22349 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22350 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22351 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22352 Value *LHS = EmitScalarExpr(E->getArg(0));
22353 Value *RHS = EmitScalarExpr(E->getArg(1));
22354 Function *Callee =
22355 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22356 return Builder.CreateCall(Callee, {LHS, RHS});
22357 }
22358 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22359 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22360 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22361 Value *LHS = EmitScalarExpr(E->getArg(0));
22362 Value *RHS = EmitScalarExpr(E->getArg(1));
22363 Function *Callee =
22364 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22365 return Builder.CreateCall(Callee, {LHS, RHS});
22366 }
22367 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22368 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22369 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22370 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22371 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22372 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22373 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22374 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22375 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22376 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22377 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22378 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22379 unsigned IntNo;
22380 switch (BuiltinID) {
22381 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22382 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22383 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22384 IntNo = Intrinsic::ceil;
22385 break;
22386 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22387 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22388 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22389 IntNo = Intrinsic::floor;
22390 break;
22391 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22392 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22393 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22394 IntNo = Intrinsic::trunc;
22395 break;
22396 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22397 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22398 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22399 IntNo = Intrinsic::nearbyint;
22400 break;
22401 default:
22402 llvm_unreachable("unexpected builtin ID");
22403 }
22404 Value *Value = EmitScalarExpr(E->getArg(0));
22406 return Builder.CreateCall(Callee, Value);
22407 }
22408 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22409 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22410 return Builder.CreateCall(Callee);
22411 }
22412 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22413 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22414 return Builder.CreateCall(Callee);
22415 }
22416 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22417 Value *Src = EmitScalarExpr(E->getArg(0));
22418 Value *Indices = EmitScalarExpr(E->getArg(1));
22419 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22420 return Builder.CreateCall(Callee, {Src, Indices});
22421 }
22422 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22423 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22424 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22425 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22426 Value *Vec = EmitScalarExpr(E->getArg(0));
22427 Value *Neg = Builder.CreateNeg(Vec, "neg");
22428 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
22429 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
22430 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
22431 }
22432 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22433 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22434 Value *LHS = EmitScalarExpr(E->getArg(0));
22435 Value *RHS = EmitScalarExpr(E->getArg(1));
22436 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
22437 ConvertType(E->getType()));
22438 return Builder.CreateCall(Callee, {LHS, RHS});
22439 }
22440 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22441 Value *LHS = EmitScalarExpr(E->getArg(0));
22442 Value *RHS = EmitScalarExpr(E->getArg(1));
22443 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
22444 return Builder.CreateCall(Callee, {LHS, RHS});
22445 }
22446 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22447 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22448 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22449 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22450 Value *Vec = EmitScalarExpr(E->getArg(0));
22451 unsigned IntNo;
22452 switch (BuiltinID) {
22453 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22454 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22455 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22456 break;
22457 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22458 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22459 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22460 break;
22461 default:
22462 llvm_unreachable("unexpected builtin ID");
22463 }
22464
22466 return Builder.CreateCall(Callee, Vec);
22467 }
22468 case WebAssembly::BI__builtin_wasm_bitselect: {
22469 Value *V1 = EmitScalarExpr(E->getArg(0));
22470 Value *V2 = EmitScalarExpr(E->getArg(1));
22471 Value *C = EmitScalarExpr(E->getArg(2));
22472 Function *Callee =
22473 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
22474 return Builder.CreateCall(Callee, {V1, V2, C});
22475 }
22476 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22477 Value *LHS = EmitScalarExpr(E->getArg(0));
22478 Value *RHS = EmitScalarExpr(E->getArg(1));
22479 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
22480 return Builder.CreateCall(Callee, {LHS, RHS});
22481 }
22482 case WebAssembly::BI__builtin_wasm_any_true_v128:
22483 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22484 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22485 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22486 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22487 unsigned IntNo;
22488 switch (BuiltinID) {
22489 case WebAssembly::BI__builtin_wasm_any_true_v128:
22490 IntNo = Intrinsic::wasm_anytrue;
22491 break;
22492 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22493 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22494 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22495 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22496 IntNo = Intrinsic::wasm_alltrue;
22497 break;
22498 default:
22499 llvm_unreachable("unexpected builtin ID");
22500 }
22501 Value *Vec = EmitScalarExpr(E->getArg(0));
22502 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
22503 return Builder.CreateCall(Callee, {Vec});
22504 }
22505 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22506 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22507 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22508 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22509 Value *Vec = EmitScalarExpr(E->getArg(0));
22510 Function *Callee =
22511 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
22512 return Builder.CreateCall(Callee, {Vec});
22513 }
22514 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22515 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22516 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22517 Value *Vec = EmitScalarExpr(E->getArg(0));
22518 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
22519 return Builder.CreateCall(Callee, {Vec});
22520 }
22521 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22522 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22523 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22524 Value *Vec = EmitScalarExpr(E->getArg(0));
22525 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
22526 return Builder.CreateCall(Callee, {Vec});
22527 }
22528 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22529 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22530 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22531 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22532 Value *Low = EmitScalarExpr(E->getArg(0));
22533 Value *High = EmitScalarExpr(E->getArg(1));
22534 unsigned IntNo;
22535 switch (BuiltinID) {
22536 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22537 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22538 IntNo = Intrinsic::wasm_narrow_signed;
22539 break;
22540 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22541 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22542 IntNo = Intrinsic::wasm_narrow_unsigned;
22543 break;
22544 default:
22545 llvm_unreachable("unexpected builtin ID");
22546 }
22547 Function *Callee =
22548 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
22549 return Builder.CreateCall(Callee, {Low, High});
22550 }
22551 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22552 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22553 Value *Vec = EmitScalarExpr(E->getArg(0));
22554 unsigned IntNo;
22555 switch (BuiltinID) {
22556 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22557 IntNo = Intrinsic::fptosi_sat;
22558 break;
22559 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22560 IntNo = Intrinsic::fptoui_sat;
22561 break;
22562 default:
22563 llvm_unreachable("unexpected builtin ID");
22564 }
22565 llvm::Type *SrcT = Vec->getType();
22566 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
22567 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
22568 Value *Trunc = Builder.CreateCall(Callee, Vec);
22569 Value *Splat = Constant::getNullValue(TruncT);
22570 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
22571 }
22572 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22573 Value *Ops[18];
22574 size_t OpIdx = 0;
22575 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
22576 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
22577 while (OpIdx < 18) {
22578 std::optional<llvm::APSInt> LaneConst =
22579 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
22580 assert(LaneConst && "Constant arg isn't actually constant?");
22581 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
22582 }
22583 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
22584 return Builder.CreateCall(Callee, Ops);
22585 }
22586 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22587 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22588 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22589 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22590 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22591 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22592 Value *A = EmitScalarExpr(E->getArg(0));
22593 Value *B = EmitScalarExpr(E->getArg(1));
22594 Value *C = EmitScalarExpr(E->getArg(2));
22595 unsigned IntNo;
22596 switch (BuiltinID) {
22597 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22598 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22599 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22600 IntNo = Intrinsic::wasm_relaxed_madd;
22601 break;
22602 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22603 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22604 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22605 IntNo = Intrinsic::wasm_relaxed_nmadd;
22606 break;
22607 default:
22608 llvm_unreachable("unexpected builtin ID");
22609 }
22610 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
22611 return Builder.CreateCall(Callee, {A, B, C});
22612 }
22613 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22614 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22615 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22616 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22617 Value *A = EmitScalarExpr(E->getArg(0));
22618 Value *B = EmitScalarExpr(E->getArg(1));
22619 Value *C = EmitScalarExpr(E->getArg(2));
22620 Function *Callee =
22621 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
22622 return Builder.CreateCall(Callee, {A, B, C});
22623 }
22624 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22625 Value *Src = EmitScalarExpr(E->getArg(0));
22626 Value *Indices = EmitScalarExpr(E->getArg(1));
22627 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
22628 return Builder.CreateCall(Callee, {Src, Indices});
22629 }
22630 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22631 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22632 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22633 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22634 Value *LHS = EmitScalarExpr(E->getArg(0));
22635 Value *RHS = EmitScalarExpr(E->getArg(1));
22636 unsigned IntNo;
22637 switch (BuiltinID) {
22638 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22639 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22640 IntNo = Intrinsic::wasm_relaxed_min;
22641 break;
22642 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22643 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22644 IntNo = Intrinsic::wasm_relaxed_max;
22645 break;
22646 default:
22647 llvm_unreachable("unexpected builtin ID");
22648 }
22649 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
22650 return Builder.CreateCall(Callee, {LHS, RHS});
22651 }
22652 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22653 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22654 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22655 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22656 Value *Vec = EmitScalarExpr(E->getArg(0));
22657 unsigned IntNo;
22658 switch (BuiltinID) {
22659 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22660 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22661 break;
22662 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22663 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22664 break;
22665 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22666 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22667 break;
22668 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22669 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22670 break;
22671 default:
22672 llvm_unreachable("unexpected builtin ID");
22673 }
22674 Function *Callee = CGM.getIntrinsic(IntNo);
22675 return Builder.CreateCall(Callee, {Vec});
22676 }
22677 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22678 Value *LHS = EmitScalarExpr(E->getArg(0));
22679 Value *RHS = EmitScalarExpr(E->getArg(1));
22680 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
22681 return Builder.CreateCall(Callee, {LHS, RHS});
22682 }
22683 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22684 Value *LHS = EmitScalarExpr(E->getArg(0));
22685 Value *RHS = EmitScalarExpr(E->getArg(1));
22686 Function *Callee =
22687 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
22688 return Builder.CreateCall(Callee, {LHS, RHS});
22689 }
22690 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22691 Value *LHS = EmitScalarExpr(E->getArg(0));
22692 Value *RHS = EmitScalarExpr(E->getArg(1));
22693 Value *Acc = EmitScalarExpr(E->getArg(2));
22694 Function *Callee =
22695 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22696 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22697 }
22698 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22699 Value *LHS = EmitScalarExpr(E->getArg(0));
22700 Value *RHS = EmitScalarExpr(E->getArg(1));
22701 Value *Acc = EmitScalarExpr(E->getArg(2));
22702 Function *Callee =
22703 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
22704 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22705 }
22706 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22707 Value *Addr = EmitScalarExpr(E->getArg(0));
22708 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
22709 return Builder.CreateCall(Callee, {Addr});
22710 }
22711 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22712 Value *Val = EmitScalarExpr(E->getArg(0));
22713 Value *Addr = EmitScalarExpr(E->getArg(1));
22714 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
22715 return Builder.CreateCall(Callee, {Val, Addr});
22716 }
22717 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22718 Value *Val = EmitScalarExpr(E->getArg(0));
22719 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
22720 return Builder.CreateCall(Callee, {Val});
22721 }
22722 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22723 Value *Vector = EmitScalarExpr(E->getArg(0));
22724 Value *Index = EmitScalarExpr(E->getArg(1));
22725 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
22726 return Builder.CreateCall(Callee, {Vector, Index});
22727 }
22728 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22729 Value *Vector = EmitScalarExpr(E->getArg(0));
22730 Value *Index = EmitScalarExpr(E->getArg(1));
22731 Value *Val = EmitScalarExpr(E->getArg(2));
22732 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
22733 return Builder.CreateCall(Callee, {Vector, Index, Val});
22734 }
22735 case WebAssembly::BI__builtin_wasm_table_get: {
22736 assert(E->getArg(0)->getType()->isArrayType());
22737 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22738 Value *Index = EmitScalarExpr(E->getArg(1));
22741 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
22742 else if (E->getType().isWebAssemblyFuncrefType())
22743 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
22744 else
22745 llvm_unreachable(
22746 "Unexpected reference type for __builtin_wasm_table_get");
22747 return Builder.CreateCall(Callee, {Table, Index});
22748 }
22749 case WebAssembly::BI__builtin_wasm_table_set: {
22750 assert(E->getArg(0)->getType()->isArrayType());
22751 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22752 Value *Index = EmitScalarExpr(E->getArg(1));
22753 Value *Val = EmitScalarExpr(E->getArg(2));
22755 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22756 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
22757 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22758 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
22759 else
22760 llvm_unreachable(
22761 "Unexpected reference type for __builtin_wasm_table_set");
22762 return Builder.CreateCall(Callee, {Table, Index, Val});
22763 }
22764 case WebAssembly::BI__builtin_wasm_table_size: {
22765 assert(E->getArg(0)->getType()->isArrayType());
22766 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22767 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
22768 return Builder.CreateCall(Callee, Value);
22769 }
22770 case WebAssembly::BI__builtin_wasm_table_grow: {
22771 assert(E->getArg(0)->getType()->isArrayType());
22772 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22773 Value *Val = EmitScalarExpr(E->getArg(1));
22774 Value *NElems = EmitScalarExpr(E->getArg(2));
22775
22777 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
22778 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
22779 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22780 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22781 else
22782 llvm_unreachable(
22783 "Unexpected reference type for __builtin_wasm_table_grow");
22784
22785 return Builder.CreateCall(Callee, {Table, Val, NElems});
22786 }
22787 case WebAssembly::BI__builtin_wasm_table_fill: {
22788 assert(E->getArg(0)->getType()->isArrayType());
22789 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22790 Value *Index = EmitScalarExpr(E->getArg(1));
22791 Value *Val = EmitScalarExpr(E->getArg(2));
22792 Value *NElems = EmitScalarExpr(E->getArg(3));
22793
22795 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22796 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
22797 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22798 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22799 else
22800 llvm_unreachable(
22801 "Unexpected reference type for __builtin_wasm_table_fill");
22802
22803 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22804 }
22805 case WebAssembly::BI__builtin_wasm_table_copy: {
22806 assert(E->getArg(0)->getType()->isArrayType());
22807 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22808 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
22809 Value *DstIdx = EmitScalarExpr(E->getArg(2));
22810 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
22811 Value *NElems = EmitScalarExpr(E->getArg(4));
22812
22813 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
22814
22815 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22816 }
22817 default:
22818 return nullptr;
22819 }
22820}
22821
22822static std::pair<Intrinsic::ID, unsigned>
22824 struct Info {
22825 unsigned BuiltinID;
22826 Intrinsic::ID IntrinsicID;
22827 unsigned VecLen;
22828 };
22829 static Info Infos[] = {
22830#define CUSTOM_BUILTIN_MAPPING(x,s) \
22831 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22832 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
22833 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
22834 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
22835 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
22836 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
22837 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
22838 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
22839 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
22840 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
22841 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
22842 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
22843 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
22844 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
22845 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
22846 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
22847 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
22848 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
22849 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
22850 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
22851 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
22852 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
22853 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
22854 // Legacy builtins that take a vector in place of a vector predicate.
22855 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
22856 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
22857 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
22858 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
22859 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
22860 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
22861 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
22862 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
22863#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22864#undef CUSTOM_BUILTIN_MAPPING
22865 };
22866
22867 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
22868 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
22869 (void)SortOnce;
22870
22871 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22872 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22873 return {Intrinsic::not_intrinsic, 0};
22874
22875 return {F->IntrinsicID, F->VecLen};
22876}
22877
22879 const CallExpr *E) {
22880 Intrinsic::ID ID;
22881 unsigned VecLen;
22882 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
22883
22884 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
22885 // The base pointer is passed by address, so it needs to be loaded.
22886 Address A = EmitPointerWithAlignment(E->getArg(0));
22888 llvm::Value *Base = Builder.CreateLoad(BP);
22889 // The treatment of both loads and stores is the same: the arguments for
22890 // the builtin are the same as the arguments for the intrinsic.
22891 // Load:
22892 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22893 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22894 // Store:
22895 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22896 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22898 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
22899 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22900
22901 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
22902 // The load intrinsics generate two results (Value, NewBase), stores
22903 // generate one (NewBase). The new base address needs to be stored.
22904 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
22905 : Result;
22906 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
22907 Address Dest = EmitPointerWithAlignment(E->getArg(0));
22908 llvm::Value *RetVal =
22909 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
22910 if (IsLoad)
22911 RetVal = Builder.CreateExtractValue(Result, 0);
22912 return RetVal;
22913 };
22914
22915 // Handle the conversion of bit-reverse load intrinsics to bit code.
22916 // The intrinsic call after this function only reads from memory and the
22917 // write to memory is dealt by the store instruction.
22918 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
22919 // The intrinsic generates one result, which is the new value for the base
22920 // pointer. It needs to be returned. The result of the load instruction is
22921 // passed to intrinsic by address, so the value needs to be stored.
22922 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
22923
22924 // Expressions like &(*pt++) will be incremented per evaluation.
22925 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22926 // per call.
22927 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
22928 DestAddr = DestAddr.withElementType(Int8Ty);
22929 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
22930
22931 // Operands are Base, Dest, Modifier.
22932 // The intrinsic format in LLVM IR is defined as
22933 // { ValueType, i8* } (i8*, i32).
22934 llvm::Value *Result = Builder.CreateCall(
22935 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
22936
22937 // The value needs to be stored as the variable is passed by reference.
22938 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
22939
22940 // The store needs to be truncated to fit the destination type.
22941 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
22942 // to be handled with stores of respective destination type.
22943 DestVal = Builder.CreateTrunc(DestVal, DestTy);
22944
22945 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
22946 // The updated value of the base pointer is returned.
22947 return Builder.CreateExtractValue(Result, 1);
22948 };
22949
22950 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
22951 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
22952 : Intrinsic::hexagon_V6_vandvrt;
22953 return Builder.CreateCall(CGM.getIntrinsic(ID),
22954 {Vec, Builder.getInt32(-1)});
22955 };
22956 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
22957 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
22958 : Intrinsic::hexagon_V6_vandqrt;
22959 return Builder.CreateCall(CGM.getIntrinsic(ID),
22960 {Pred, Builder.getInt32(-1)});
22961 };
22962
22963 switch (BuiltinID) {
22964 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
22965 // and the corresponding C/C++ builtins use loads/stores to update
22966 // the predicate.
22967 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
22968 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
22969 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
22970 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
22971 // Get the type from the 0-th argument.
22972 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22973 Address PredAddr =
22974 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22975 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
22976 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22977 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
22978
22979 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22980 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22981 PredAddr.getAlignment());
22982 return Builder.CreateExtractValue(Result, 0);
22983 }
22984 // These are identical to the builtins above, except they don't consume
22985 // input carry, only generate carry-out. Since they still produce two
22986 // outputs, generate the store of the predicate, but no load.
22987 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
22988 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
22989 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
22990 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
22991 // Get the type from the 0-th argument.
22992 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22993 Address PredAddr =
22994 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22995 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22996 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22997
22998 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22999 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
23000 PredAddr.getAlignment());
23001 return Builder.CreateExtractValue(Result, 0);
23002 }
23003
23004 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
23005 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
23006 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
23007 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
23008 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
23009 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
23010 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
23011 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
23013 const Expr *PredOp = E->getArg(0);
23014 // There will be an implicit cast to a boolean vector. Strip it.
23015 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
23016 if (Cast->getCastKind() == CK_BitCast)
23017 PredOp = Cast->getSubExpr();
23018 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
23019 }
23020 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
23021 Ops.push_back(EmitScalarExpr(E->getArg(i)));
23022 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
23023 }
23024
23025 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
23026 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
23027 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
23028 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
23029 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
23030 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
23031 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
23032 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
23033 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
23034 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
23035 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
23036 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
23037 return MakeCircOp(ID, /*IsLoad=*/true);
23038 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
23039 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
23040 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
23041 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
23042 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
23043 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
23044 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
23045 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
23046 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
23047 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
23048 return MakeCircOp(ID, /*IsLoad=*/false);
23049 case Hexagon::BI__builtin_brev_ldub:
23050 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
23051 case Hexagon::BI__builtin_brev_ldb:
23052 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
23053 case Hexagon::BI__builtin_brev_lduh:
23054 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
23055 case Hexagon::BI__builtin_brev_ldh:
23056 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
23057 case Hexagon::BI__builtin_brev_ldw:
23058 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
23059 case Hexagon::BI__builtin_brev_ldd:
23060 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
23061 } // switch
23062
23063 return nullptr;
23064}
23065
23067 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
23068 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
23069 return EmitRISCVCpuIs(CPUStr);
23070}
23071
23072Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) {
23073 llvm::Type *Int32Ty = Builder.getInt32Ty();
23074 llvm::Type *Int64Ty = Builder.getInt64Ty();
23075 llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty);
23076 llvm::Constant *RISCVCPUModel =
23077 CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model");
23078 cast<llvm::GlobalValue>(RISCVCPUModel)->setDSOLocal(true);
23079
23080 auto loadRISCVCPUID = [&](unsigned Index) {
23081 Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index);
23082 Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index),
23083 Ptr, llvm::MaybeAlign());
23084 return CPUID;
23085 };
23086
23087 const llvm::RISCV::CPUModel Model = llvm::RISCV::getCPUModel(CPUStr);
23088
23089 // Compare mvendorid.
23090 Value *VendorID = loadRISCVCPUID(0);
23091 Value *Result =
23092 Builder.CreateICmpEQ(VendorID, Builder.getInt32(Model.MVendorID));
23093
23094 // Compare marchid.
23095 Value *ArchID = loadRISCVCPUID(1);
23096 Result = Builder.CreateAnd(
23097 Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(Model.MArchID)));
23098
23099 // Compare mimpid.
23100 Value *ImpID = loadRISCVCPUID(2);
23101 Result = Builder.CreateAnd(
23102 Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(Model.MImpID)));
23103
23104 return Result;
23105}
23106
23108 const CallExpr *E,
23109 ReturnValueSlot ReturnValue) {
23110
23111 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
23112 return EmitRISCVCpuSupports(E);
23113 if (BuiltinID == Builtin::BI__builtin_cpu_init)
23114 return EmitRISCVCpuInit();
23115 if (BuiltinID == Builtin::BI__builtin_cpu_is)
23116 return EmitRISCVCpuIs(E);
23117
23119 llvm::Type *ResultType = ConvertType(E->getType());
23120
23121 // Find out if any arguments are required to be integer constant expressions.
23122 unsigned ICEArguments = 0;
23124 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
23125 if (Error == ASTContext::GE_Missing_type) {
23126 // Vector intrinsics don't have a type string.
23127 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
23128 BuiltinID <= clang::RISCV::LastRVVBuiltin);
23129 ICEArguments = 0;
23130 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
23131 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
23132 ICEArguments = 1 << 1;
23133 } else {
23134 assert(Error == ASTContext::GE_None && "Unexpected error");
23135 }
23136
23137 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
23138 ICEArguments |= (1 << 1);
23139 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
23140 ICEArguments |= (1 << 2);
23141
23142 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
23143 // Handle aggregate argument, namely RVV tuple types in segment load/store
23144 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
23145 LValue L = EmitAggExprToLValue(E->getArg(i));
23146 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
23147 Ops.push_back(AggValue);
23148 continue;
23149 }
23150 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
23151 }
23152
23153 Intrinsic::ID ID = Intrinsic::not_intrinsic;
23154 // The 0th bit simulates the `vta` of RVV
23155 // The 1st bit simulates the `vma` of RVV
23156 constexpr unsigned RVV_VTA = 0x1;
23157 constexpr unsigned RVV_VMA = 0x2;
23158 int PolicyAttrs = 0;
23159 bool IsMasked = false;
23160 // This is used by segment load/store to determine it's llvm type.
23161 unsigned SegInstSEW = 8;
23162
23163 // Required for overloaded intrinsics.
23165 switch (BuiltinID) {
23166 default: llvm_unreachable("unexpected builtin ID");
23167 case RISCV::BI__builtin_riscv_orc_b_32:
23168 case RISCV::BI__builtin_riscv_orc_b_64:
23169 case RISCV::BI__builtin_riscv_clmul_32:
23170 case RISCV::BI__builtin_riscv_clmul_64:
23171 case RISCV::BI__builtin_riscv_clmulh_32:
23172 case RISCV::BI__builtin_riscv_clmulh_64:
23173 case RISCV::BI__builtin_riscv_clmulr_32:
23174 case RISCV::BI__builtin_riscv_clmulr_64:
23175 case RISCV::BI__builtin_riscv_xperm4_32:
23176 case RISCV::BI__builtin_riscv_xperm4_64:
23177 case RISCV::BI__builtin_riscv_xperm8_32:
23178 case RISCV::BI__builtin_riscv_xperm8_64:
23179 case RISCV::BI__builtin_riscv_brev8_32:
23180 case RISCV::BI__builtin_riscv_brev8_64:
23181 case RISCV::BI__builtin_riscv_zip_32:
23182 case RISCV::BI__builtin_riscv_unzip_32: {
23183 switch (BuiltinID) {
23184 default: llvm_unreachable("unexpected builtin ID");
23185 // Zbb
23186 case RISCV::BI__builtin_riscv_orc_b_32:
23187 case RISCV::BI__builtin_riscv_orc_b_64:
23188 ID = Intrinsic::riscv_orc_b;
23189 break;
23190
23191 // Zbc
23192 case RISCV::BI__builtin_riscv_clmul_32:
23193 case RISCV::BI__builtin_riscv_clmul_64:
23194 ID = Intrinsic::riscv_clmul;
23195 break;
23196 case RISCV::BI__builtin_riscv_clmulh_32:
23197 case RISCV::BI__builtin_riscv_clmulh_64:
23198 ID = Intrinsic::riscv_clmulh;
23199 break;
23200 case RISCV::BI__builtin_riscv_clmulr_32:
23201 case RISCV::BI__builtin_riscv_clmulr_64:
23202 ID = Intrinsic::riscv_clmulr;
23203 break;
23204
23205 // Zbkx
23206 case RISCV::BI__builtin_riscv_xperm8_32:
23207 case RISCV::BI__builtin_riscv_xperm8_64:
23208 ID = Intrinsic::riscv_xperm8;
23209 break;
23210 case RISCV::BI__builtin_riscv_xperm4_32:
23211 case RISCV::BI__builtin_riscv_xperm4_64:
23212 ID = Intrinsic::riscv_xperm4;
23213 break;
23214
23215 // Zbkb
23216 case RISCV::BI__builtin_riscv_brev8_32:
23217 case RISCV::BI__builtin_riscv_brev8_64:
23218 ID = Intrinsic::riscv_brev8;
23219 break;
23220 case RISCV::BI__builtin_riscv_zip_32:
23221 ID = Intrinsic::riscv_zip;
23222 break;
23223 case RISCV::BI__builtin_riscv_unzip_32:
23224 ID = Intrinsic::riscv_unzip;
23225 break;
23226 }
23227
23228 IntrinsicTypes = {ResultType};
23229 break;
23230 }
23231
23232 // Zk builtins
23233
23234 // Zknh
23235 case RISCV::BI__builtin_riscv_sha256sig0:
23236 ID = Intrinsic::riscv_sha256sig0;
23237 break;
23238 case RISCV::BI__builtin_riscv_sha256sig1:
23239 ID = Intrinsic::riscv_sha256sig1;
23240 break;
23241 case RISCV::BI__builtin_riscv_sha256sum0:
23242 ID = Intrinsic::riscv_sha256sum0;
23243 break;
23244 case RISCV::BI__builtin_riscv_sha256sum1:
23245 ID = Intrinsic::riscv_sha256sum1;
23246 break;
23247
23248 // Zksed
23249 case RISCV::BI__builtin_riscv_sm4ks:
23250 ID = Intrinsic::riscv_sm4ks;
23251 break;
23252 case RISCV::BI__builtin_riscv_sm4ed:
23253 ID = Intrinsic::riscv_sm4ed;
23254 break;
23255
23256 // Zksh
23257 case RISCV::BI__builtin_riscv_sm3p0:
23258 ID = Intrinsic::riscv_sm3p0;
23259 break;
23260 case RISCV::BI__builtin_riscv_sm3p1:
23261 ID = Intrinsic::riscv_sm3p1;
23262 break;
23263
23264 case RISCV::BI__builtin_riscv_clz_32:
23265 case RISCV::BI__builtin_riscv_clz_64: {
23266 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
23267 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23268 if (Result->getType() != ResultType)
23269 Result =
23270 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23271 return Result;
23272 }
23273 case RISCV::BI__builtin_riscv_ctz_32:
23274 case RISCV::BI__builtin_riscv_ctz_64: {
23275 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
23276 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23277 if (Result->getType() != ResultType)
23278 Result =
23279 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23280 return Result;
23281 }
23282
23283 // Zihintntl
23284 case RISCV::BI__builtin_riscv_ntl_load: {
23285 llvm::Type *ResTy = ConvertType(E->getType());
23286 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23287 if (Ops.size() == 2)
23288 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
23289
23290 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23292 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23293 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23294 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23295
23296 int Width;
23297 if(ResTy->isScalableTy()) {
23298 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
23299 llvm::Type *ScalarTy = ResTy->getScalarType();
23300 Width = ScalarTy->getPrimitiveSizeInBits() *
23301 SVTy->getElementCount().getKnownMinValue();
23302 } else
23303 Width = ResTy->getPrimitiveSizeInBits();
23304 LoadInst *Load = Builder.CreateLoad(
23305 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
23306
23307 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23308 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23309 RISCVDomainNode);
23310
23311 return Load;
23312 }
23313 case RISCV::BI__builtin_riscv_ntl_store: {
23314 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23315 if (Ops.size() == 3)
23316 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
23317
23318 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23320 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23321 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23322 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23323
23324 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
23325 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23326 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23327 RISCVDomainNode);
23328
23329 return Store;
23330 }
23331 // XCValu
23332 case RISCV::BI__builtin_riscv_cv_alu_addN:
23333 ID = Intrinsic::riscv_cv_alu_addN;
23334 break;
23335 case RISCV::BI__builtin_riscv_cv_alu_addRN:
23336 ID = Intrinsic::riscv_cv_alu_addRN;
23337 break;
23338 case RISCV::BI__builtin_riscv_cv_alu_adduN:
23339 ID = Intrinsic::riscv_cv_alu_adduN;
23340 break;
23341 case RISCV::BI__builtin_riscv_cv_alu_adduRN:
23342 ID = Intrinsic::riscv_cv_alu_adduRN;
23343 break;
23344 case RISCV::BI__builtin_riscv_cv_alu_clip:
23345 ID = Intrinsic::riscv_cv_alu_clip;
23346 break;
23347 case RISCV::BI__builtin_riscv_cv_alu_clipu:
23348 ID = Intrinsic::riscv_cv_alu_clipu;
23349 break;
23350 case RISCV::BI__builtin_riscv_cv_alu_extbs:
23351 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23352 "extbs");
23353 case RISCV::BI__builtin_riscv_cv_alu_extbz:
23354 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23355 "extbz");
23356 case RISCV::BI__builtin_riscv_cv_alu_exths:
23357 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23358 "exths");
23359 case RISCV::BI__builtin_riscv_cv_alu_exthz:
23360 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23361 "exthz");
23362 case RISCV::BI__builtin_riscv_cv_alu_slet:
23363 return Builder.CreateZExt(Builder.CreateICmpSLE(Ops[0], Ops[1]), Int32Ty,
23364 "sle");
23365 case RISCV::BI__builtin_riscv_cv_alu_sletu:
23366 return Builder.CreateZExt(Builder.CreateICmpULE(Ops[0], Ops[1]), Int32Ty,
23367 "sleu");
23368 case RISCV::BI__builtin_riscv_cv_alu_subN:
23369 ID = Intrinsic::riscv_cv_alu_subN;
23370 break;
23371 case RISCV::BI__builtin_riscv_cv_alu_subRN:
23372 ID = Intrinsic::riscv_cv_alu_subRN;
23373 break;
23374 case RISCV::BI__builtin_riscv_cv_alu_subuN:
23375 ID = Intrinsic::riscv_cv_alu_subuN;
23376 break;
23377 case RISCV::BI__builtin_riscv_cv_alu_subuRN:
23378 ID = Intrinsic::riscv_cv_alu_subuRN;
23379 break;
23380
23381 // Vector builtins are handled from here.
23382#include "clang/Basic/riscv_vector_builtin_cg.inc"
23383
23384 // SiFive Vector builtins are handled from here.
23385#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
23386 }
23387
23388 assert(ID != Intrinsic::not_intrinsic);
23389
23390 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
23391 return Builder.CreateCall(F, Ops, "");
23392}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3460
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8960
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9796
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:377
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1414
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6986
static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch, CGHLSLRuntime &RT, QualType QT)
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:569
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2307
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2273
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6855
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2764
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9766
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:1029
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9759
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:8001
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9986
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8013
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7983
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:9028
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2642
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:622
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:967
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:672
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6982
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8014
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1611
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:8018
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
static Value * loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder, CodeGenModule &CGM)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:1089
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:791
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2670
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:646
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:997
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9755
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8015
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9833
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6738
constexpr unsigned SVEBitsPerBlock
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2007
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7825
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6979
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:143
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:689
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:429
static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo)
Checks no arguments or results are passed indirectly in the ABI (i.e.
Definition: CGBuiltin.cpp:903
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition: CGBuiltin.cpp:250
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9860
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1848
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1697
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1475
static Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:766
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6991
@ UnsignedAlts
Definition: CGBuiltin.cpp:6949
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6954
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6958
@ Use64BitVectors
Definition: CGBuiltin.cpp:6951
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6946
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6956
@ InventFloatType
Definition: CGBuiltin.cpp:6948
@ AddRetType
Definition: CGBuiltin.cpp:6941
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6943
@ VectorizeRetType
Definition: CGBuiltin.cpp:6945
@ VectorRet
Definition: CGBuiltin.cpp:6955
@ Add1ArgType
Definition: CGBuiltin.cpp:6942
@ Use128BitVectors
Definition: CGBuiltin.cpp:6952
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:869
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:860
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2500
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:1037
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1550
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9822
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:726
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:985
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2554
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2693
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6814
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:420
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9848
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:348
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8939
static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:827
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:79
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8931
@ VolatileRead
Definition: CGBuiltin.cpp:8933
@ NormalRead
Definition: CGBuiltin.cpp:8932
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:514
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:359
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2542
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:474
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:706
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:337
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9788
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8010
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:7310
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:401
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:952
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:8076
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:779
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:809
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:658
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2775
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1424
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2508
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:745
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:914
static Value * handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:213
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:412
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1460
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8858
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2267
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:633
static Value * handleHlslClip(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:102
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8012
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7585
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:147
unsigned Iter
Definition: HTMLLogger.cpp:153
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:144
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition: SemaHLSL.cpp:43
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
C Language Family Type Representation.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:465
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
IdentifierTable & Idents
Definition: ASTContext.h:680
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:682
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType IntTy
Definition: ASTContext.h:1169
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2203
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2770
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2489
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1160
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:799
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2391
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2394
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3577
QualType getElementType() const
Definition: Type.h:3589
static std::unique_ptr< AtomicScopeModel > create(AtomicScopeModelKind K)
Create an atomic scope model by AtomicScopeModelKind.
Definition: SyncScope.h:273
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:150
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:248
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:252
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:123
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2874
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:858
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:898
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:915
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:389
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:381
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:405
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:151
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:398
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:219
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:315
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:437
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:365
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:261
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:346
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
MutableArrayRef< ArgInfo > arguments()
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:305
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID, bool NoMerge=false)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, QualType Ty)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
llvm::Value * EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
llvm::Value * EmitCheckedArgForAssume(const Expr *E)
Emits an argument for a call to a __builtin_assume.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerKind::SanitizerOrdinal > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
llvm::Value * EmitRISCVCpuIs(const CallExpr *E)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
llvm::Type * ConvertType(QualType T)
void EmitWritebacks(const CallArgList &Args)
EmitWriteback - Emit callbacks for function.
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2913
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:263
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1630
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:667
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:382
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
llvm::Value * getAggregatePointer(QualType PointeeType, CodeGenFunction &CGF) const
Definition: CGValue.h:88
bool isScalar() const
Definition: CGValue.h:64
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:386
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:170
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:160
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:142
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:179
Complex values, per C99 6.2.5p11.
Definition: Type.h:3145
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4232
Represents a sugar type with __counted_by or __sized_by annotations, including their _or_null variant...
Definition: Type.h:3306
DynamicCountPointerKind getKind() const
Definition: Type.h:3336
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:2032
T * getAttr() const
Definition: DeclBase.h:576
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:596
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:254
DeclContext * getDeclContext()
Definition: DeclBase.h:451
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:440
bool hasAttr() const
Definition: DeclBase.h:580
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3102
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3097
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3093
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3594
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3077
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3970
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:276
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:225
Represents difference between two FPOptions values.
Definition: LangOptions.h:978
Represents a member of a struct/union/class.
Definition: Decl.h:3033
const FieldDecl * findCountedByField() const
Find the FieldDecl specified in a FAM's "counted_by" attribute.
Definition: Decl.cpp:4707
Represents a function declaration or definition.
Definition: Decl.h:1935
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2672
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3638
Represents a prototype with parameter type info, e.g.
Definition: Type.h:5107
@ SME_PStateSMEnabledMask
Definition: Type.h:4587
@ SME_PStateSMCompatibleMask
Definition: Type.h:4588
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5406
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:296
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7785
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3198
QualType getPointeeType() const
Definition: Type.h:3208
A (possibly-)qualified type.
Definition: Type.h:929
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:8020
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2893
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:8062
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2889
The collection of all-type qualifiers we support.
Definition: Type.h:324
Represents a struct/union/class.
Definition: Decl.h:4162
field_range fields() const
Definition: Decl.h:4376
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool setsFPMR() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:345
bool isUnion() const
Definition: Decl.h:3784
bool areArgsDestroyedLeftToRightInCallee() const
Are arguments to a call destroyed left to right in the callee? This is a fundamental language change,...
Definition: TargetCXXABI.h:188
Exposes information about the current target.
Definition: TargetInfo.h:220
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:311
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:697
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1262
bool isLittleEndian() const
Definition: TargetInfo.h:1673
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:861
bool isBigEndian() const
Definition: TargetInfo.h:1672
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
Definition: TargetInfo.h:1333
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1679
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:733
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1828
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1916
bool isBlockPointerType() const
Definition: Type.h:8205
bool isVoidType() const
Definition: Type.h:8515
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2180
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:710
bool hasIntegerRepresentation() const
Determine whether this type has an integer representation of some sort, e.g., it is an integer type o...
Definition: Type.cpp:2055
bool isArrayType() const
Definition: Type.h:8263
bool isCountAttributedType() const
Definition: Type.cpp:727
bool isPointerType() const
Definition: Type.h:8191
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8555
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8805
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8630
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2270
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2220
bool isObjCObjectPointerType() const
Definition: Type.h:8333
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2292
bool isVectorType() const
Definition: Type.h:8303
bool isFloatingType() const
Definition: Type.cpp:2283
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2230
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8736
bool isRecordType() const
Definition: Type.h:8291
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2513
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2581
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1920
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
QualType getType() const
Definition: Decl.h:682
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:4034
unsigned getNumElements() const
Definition: Type.h:4049
QualType getElementType() const
Definition: Type.h:4048
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: FixedPoint.h:19
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1212
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2350
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:447
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:726
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1693
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2126
bool Ret(InterpState &S, CodePtr &PC)
Definition: Interp.h:318
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
SyncScope
Defines synch scope values used internally by clang.
Definition: SyncScope.h:42
llvm::StringRef getAsString(SyncScope S)
Definition: SyncScope.h:60
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
int int32_t
unsigned long uint64_t
long int64_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:190
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:182
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:169
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742