Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CGValue.h"
21#include "CodeGenFunction.h"
22#include "CodeGenModule.h"
23#include "ConstantEmitter.h"
24#include "PatternInit.h"
25#include "TargetInfo.h"
27#include "clang/AST/Attr.h"
28#include "clang/AST/Decl.h"
29#include "clang/AST/Expr.h"
30#include "clang/AST/OSLog.h"
32#include "clang/AST/Type.h"
38#include "llvm/ADT/APFloat.h"
39#include "llvm/ADT/APInt.h"
40#include "llvm/ADT/FloatingPointMode.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/StringExtras.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsBPF.h"
51#include "llvm/IR/IntrinsicsDirectX.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/IntrinsicsNVPTX.h"
54#include "llvm/IR/IntrinsicsPowerPC.h"
55#include "llvm/IR/IntrinsicsR600.h"
56#include "llvm/IR/IntrinsicsRISCV.h"
57#include "llvm/IR/IntrinsicsS390.h"
58#include "llvm/IR/IntrinsicsWebAssembly.h"
59#include "llvm/IR/IntrinsicsX86.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/MatrixBuilder.h"
62#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63#include "llvm/Support/AMDGPUAddrSpace.h"
64#include "llvm/Support/ConvertUTF.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/ScopedPrinter.h"
67#include "llvm/TargetParser/AArch64TargetParser.h"
68#include "llvm/TargetParser/RISCVISAInfo.h"
69#include "llvm/TargetParser/RISCVTargetParser.h"
70#include "llvm/TargetParser/X86TargetParser.h"
71#include <numeric>
72#include <optional>
73#include <utility>
74
75using namespace clang;
76using namespace CodeGen;
77using namespace llvm;
78
79static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
80 Align AlignmentInBytes) {
81 ConstantInt *Byte;
82 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
83 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
84 // Nothing to initialize.
85 return;
86 case LangOptions::TrivialAutoVarInitKind::Zero:
87 Byte = CGF.Builder.getInt8(0x00);
88 break;
89 case LangOptions::TrivialAutoVarInitKind::Pattern: {
90 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
91 Byte = llvm::dyn_cast<llvm::ConstantInt>(
92 initializationPatternFor(CGF.CGM, Int8));
93 break;
94 }
95 }
96 if (CGF.CGM.stopAutoInit())
97 return;
98 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
99 I->addAnnotationMetadata("auto-init");
100}
101
103 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
104
105 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
106 Value *CMP;
107 Value *LastInstr;
108
109 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
110 FZeroConst = ConstantVector::getSplat(
111 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
112 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
113 CMP = CGF->Builder.CreateIntrinsic(
114 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
115 {FCompInst}, nullptr);
116 } else
117 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
118
119 if (CGF->CGM.getTarget().getTriple().isDXIL())
120 LastInstr = CGF->Builder.CreateIntrinsic(
121 CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
122 else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
123 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
124 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
125
126 CGF->Builder.CreateCondBr(CMP, LT0, End);
127
128 CGF->Builder.SetInsertPoint(LT0);
129
130 CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
131 nullptr);
132
133 LastInstr = CGF->Builder.CreateBr(End);
134
135 CGF->Builder.SetInsertPoint(End);
136 } else {
137 llvm_unreachable("Backend Codegen not supported.");
138 }
139
140 return LastInstr;
141}
142
144 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
145 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
146 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
147
148 CallArgList Args;
149 LValue Op1TmpLValue =
150 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
151 LValue Op2TmpLValue =
152 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
153
155 Args.reverseWritebacks();
156
157 Value *LowBits = nullptr;
158 Value *HighBits = nullptr;
159
160 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
161
162 llvm::Type *RetElementTy = CGF->Int32Ty;
163 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
164 RetElementTy = llvm::VectorType::get(
165 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
166 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
167
168 CallInst *CI = CGF->Builder.CreateIntrinsic(
169 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
170
171 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
172 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
173
174 } else {
175 // For Non DXIL targets we generate the instructions.
176
177 if (!Op0->getType()->isVectorTy()) {
178 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
179 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
180
181 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
182 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
183 } else {
184 int NumElements = 1;
185 if (const auto *VecTy =
186 E->getArg(0)->getType()->getAs<clang::VectorType>())
187 NumElements = VecTy->getNumElements();
188
189 FixedVectorType *Uint32VecTy =
190 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
191 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
192 if (NumElements == 1) {
193 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
194 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
195 } else {
196 SmallVector<int> EvenMask, OddMask;
197 for (int I = 0, E = NumElements; I != E; ++I) {
198 EvenMask.push_back(I * 2);
199 OddMask.push_back(I * 2 + 1);
200 }
201 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
202 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
203 }
204 }
205 }
206 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
207 auto *LastInst =
208 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
209 CGF->EmitWritebacks(Args);
210 return LastInst;
211}
212
214 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
215 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
216 "asdouble operands types mismatch");
217 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
218 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
219
220 llvm::Type *ResultType = CGF.DoubleTy;
221 int N = 1;
222 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
223 N = VTy->getNumElements();
224 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
225 }
226
227 if (CGF.CGM.getTarget().getTriple().isDXIL())
228 return CGF.Builder.CreateIntrinsic(
229 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
230 ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
231
232 if (!E->getArg(0)->getType()->isVectorType()) {
233 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
234 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
235 }
236
238 for (int i = 0; i < N; i++) {
239 Mask.push_back(i);
240 Mask.push_back(i + N);
241 }
242
243 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
244
245 return CGF.Builder.CreateBitCast(BitVec, ResultType);
246}
247
248/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
249/// return it as an i8 pointer.
251 LLVMContext &Context = CGF.CGM.getLLVMContext();
252 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
253 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
254 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
255 llvm::Function *F =
256 CGF.CGM.getIntrinsic(llvm::Intrinsic::read_register, {CGF.Int64Ty});
257 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
258 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
259}
260
261/// getBuiltinLibFunction - Given a builtin id for a function like
262/// "__builtin_fabsf", return a Function* for "fabsf".
264 unsigned BuiltinID) {
265 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
266
267 // Get the name, skip over the __builtin_ prefix (if necessary).
268 StringRef Name;
269 GlobalDecl D(FD);
270
271 // TODO: This list should be expanded or refactored after all GCC-compatible
272 // std libcall builtins are implemented.
273 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
274 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
275 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
276 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
277 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
278 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
279 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
280 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
281 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
282 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
283 {Builtin::BI__builtin_printf, "__printfieee128"},
284 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
285 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
286 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
287 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
288 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
289 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
290 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
291 {Builtin::BI__builtin_scanf, "__scanfieee128"},
292 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
293 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
294 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
295 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
296 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
297 };
298
299 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
300 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
301 // if it is 64-bit 'long double' mode.
302 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
303 {Builtin::BI__builtin_frexpl, "frexp"},
304 {Builtin::BI__builtin_ldexpl, "ldexp"},
305 {Builtin::BI__builtin_modfl, "modf"},
306 };
307
308 // If the builtin has been declared explicitly with an assembler label,
309 // use the mangled name. This differs from the plain label on platforms
310 // that prefix labels.
311 if (FD->hasAttr<AsmLabelAttr>())
312 Name = getMangledName(D);
313 else {
314 // TODO: This mutation should also be applied to other targets other than
315 // PPC, after backend supports IEEE 128-bit style libcalls.
316 if (getTriple().isPPC64() &&
317 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
318 F128Builtins.contains(BuiltinID))
319 Name = F128Builtins[BuiltinID];
320 else if (getTriple().isOSAIX() &&
321 &getTarget().getLongDoubleFormat() ==
322 &llvm::APFloat::IEEEdouble() &&
323 AIXLongDouble64Builtins.contains(BuiltinID))
324 Name = AIXLongDouble64Builtins[BuiltinID];
325 else
326 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
327 }
328
329 llvm::FunctionType *Ty =
330 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
331
332 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
333}
334
335/// Emit the conversions required to turn the given value into an
336/// integer of the given size.
337static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
338 QualType T, llvm::IntegerType *IntType) {
339 V = CGF.EmitToMemory(V, T);
340
341 if (V->getType()->isPointerTy())
342 return CGF.Builder.CreatePtrToInt(V, IntType);
343
344 assert(V->getType() == IntType);
345 return V;
346}
347
348static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
349 QualType T, llvm::Type *ResultType) {
350 V = CGF.EmitFromMemory(V, T);
351
352 if (ResultType->isPointerTy())
353 return CGF.Builder.CreateIntToPtr(V, ResultType);
354
355 assert(V->getType() == ResultType);
356 return V;
357}
358
360 ASTContext &Ctx = CGF.getContext();
361 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
362 unsigned Bytes = Ptr.getElementType()->isPointerTy()
364 : Ptr.getElementType()->getScalarSizeInBits() / 8;
365 unsigned Align = Ptr.getAlignment().getQuantity();
366 if (Align % Bytes != 0) {
367 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
368 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
369 // Force address to be at least naturally-aligned.
370 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
371 }
372 return Ptr;
373}
374
375/// Utility to insert an atomic instruction based on Intrinsic::ID
376/// and the expression node.
378 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
379 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
380
381 QualType T = E->getType();
382 assert(E->getArg(0)->getType()->isPointerType());
384 E->getArg(0)->getType()->getPointeeType()));
385 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
386
387 Address DestAddr = CheckAtomicAlignment(CGF, E);
388
389 llvm::IntegerType *IntType = llvm::IntegerType::get(
390 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
391
392 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
393 llvm::Type *ValueType = Val->getType();
394 Val = EmitToInt(CGF, Val, T, IntType);
395
396 llvm::Value *Result =
397 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
398 return EmitFromInt(CGF, Result, T, ValueType);
399}
400
402 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
403 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
404
405 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
406 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
407 LV.setNontemporal(true);
408 CGF.EmitStoreOfScalar(Val, LV, false);
409 return nullptr;
410}
411
413 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
414
415 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
416 LV.setNontemporal(true);
417 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
418}
419
421 llvm::AtomicRMWInst::BinOp Kind,
422 const CallExpr *E) {
423 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
424}
425
426/// Utility to insert an atomic instruction based Intrinsic::ID and
427/// the expression node, where the return value is the result of the
428/// operation.
430 llvm::AtomicRMWInst::BinOp Kind,
431 const CallExpr *E,
432 Instruction::BinaryOps Op,
433 bool Invert = false) {
434 QualType T = E->getType();
435 assert(E->getArg(0)->getType()->isPointerType());
437 E->getArg(0)->getType()->getPointeeType()));
438 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
439
440 Address DestAddr = CheckAtomicAlignment(CGF, E);
441
442 llvm::IntegerType *IntType = llvm::IntegerType::get(
443 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
444
445 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
446 llvm::Type *ValueType = Val->getType();
447 Val = EmitToInt(CGF, Val, T, IntType);
448
449 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
450 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
451 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
452 if (Invert)
453 Result =
454 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
455 llvm::ConstantInt::getAllOnesValue(IntType));
456 Result = EmitFromInt(CGF, Result, T, ValueType);
457 return RValue::get(Result);
458}
459
460/// Utility to insert an atomic cmpxchg instruction.
461///
462/// @param CGF The current codegen function.
463/// @param E Builtin call expression to convert to cmpxchg.
464/// arg0 - address to operate on
465/// arg1 - value to compare with
466/// arg2 - new value
467/// @param ReturnBool Specifies whether to return success flag of
468/// cmpxchg result or the old value.
469///
470/// @returns result of cmpxchg, according to ReturnBool
471///
472/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
473/// invoke the function EmitAtomicCmpXchgForMSIntrin.
475 bool ReturnBool) {
476 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
477 Address DestAddr = CheckAtomicAlignment(CGF, E);
478
479 llvm::IntegerType *IntType = llvm::IntegerType::get(
480 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
481
482 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
483 llvm::Type *ValueType = Cmp->getType();
484 Cmp = EmitToInt(CGF, Cmp, T, IntType);
485 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
486
488 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
489 llvm::AtomicOrdering::SequentiallyConsistent);
490 if (ReturnBool)
491 // Extract boolean success flag and zext it to int.
492 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
493 CGF.ConvertType(E->getType()));
494 else
495 // Extract old value and emit it using the same type as compare value.
496 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
497 ValueType);
498}
499
500/// This function should be invoked to emit atomic cmpxchg for Microsoft's
501/// _InterlockedCompareExchange* intrinsics which have the following signature:
502/// T _InterlockedCompareExchange(T volatile *Destination,
503/// T Exchange,
504/// T Comparand);
505///
506/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
507/// cmpxchg *Destination, Comparand, Exchange.
508/// So we need to swap Comparand and Exchange when invoking
509/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
510/// function MakeAtomicCmpXchgValue since it expects the arguments to be
511/// already swapped.
512
513static
515 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
516 assert(E->getArg(0)->getType()->isPointerType());
518 E->getType(), E->getArg(0)->getType()->getPointeeType()));
520 E->getArg(1)->getType()));
522 E->getArg(2)->getType()));
523
524 Address DestAddr = CheckAtomicAlignment(CGF, E);
525
526 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
527 auto *RTy = Exchange->getType();
528
529 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
530
531 if (RTy->isPointerTy()) {
532 Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
533 Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
534 }
535
536 // For Release ordering, the failure ordering should be Monotonic.
537 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
538 AtomicOrdering::Monotonic :
539 SuccessOrdering;
540
541 // The atomic instruction is marked volatile for consistency with MSVC. This
542 // blocks the few atomics optimizations that LLVM has. If we want to optimize
543 // _Interlocked* operations in the future, we will have to remove the volatile
544 // marker.
545 auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
546 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
547 CmpXchg->setVolatile(true);
548
549 auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
550 if (RTy->isPointerTy()) {
551 Result = CGF.Builder.CreateIntToPtr(Result, RTy);
552 }
553
554 return Result;
555}
556
557// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
558// prototyped like this:
559//
560// unsigned char _InterlockedCompareExchange128...(
561// __int64 volatile * _Destination,
562// __int64 _ExchangeHigh,
563// __int64 _ExchangeLow,
564// __int64 * _ComparandResult);
565//
566// Note that Destination is assumed to be at least 16-byte aligned, despite
567// being typed int64.
568
570 const CallExpr *E,
571 AtomicOrdering SuccessOrdering) {
572 assert(E->getNumArgs() == 4);
573 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
574 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
575 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
576 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
577
578 assert(DestPtr->getType()->isPointerTy());
579 assert(!ExchangeHigh->getType()->isPointerTy());
580 assert(!ExchangeLow->getType()->isPointerTy());
581
582 // For Release ordering, the failure ordering should be Monotonic.
583 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
584 ? AtomicOrdering::Monotonic
585 : SuccessOrdering;
586
587 // Convert to i128 pointers and values. Alignment is also overridden for
588 // destination pointer.
589 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
590 Address DestAddr(DestPtr, Int128Ty,
592 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
593
594 // (((i128)hi) << 64) | ((i128)lo)
595 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
596 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
597 ExchangeHigh =
598 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
599 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
600
601 // Load the comparand for the instruction.
602 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
603
604 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
605 SuccessOrdering, FailureOrdering);
606
607 // The atomic instruction is marked volatile for consistency with MSVC. This
608 // blocks the few atomics optimizations that LLVM has. If we want to optimize
609 // _Interlocked* operations in the future, we will have to remove the volatile
610 // marker.
611 CXI->setVolatile(true);
612
613 // Store the result as an outparameter.
614 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
615 ComparandAddr);
616
617 // Get the success boolean and zero extend it to i8.
618 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
619 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
620}
621
623 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
624 assert(E->getArg(0)->getType()->isPointerType());
625
626 auto *IntTy = CGF.ConvertType(E->getType());
627 Address DestAddr = CheckAtomicAlignment(CGF, E);
628 auto *Result = CGF.Builder.CreateAtomicRMW(
629 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
630 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
631}
632
634 CodeGenFunction &CGF, const CallExpr *E,
635 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
636 assert(E->getArg(0)->getType()->isPointerType());
637
638 auto *IntTy = CGF.ConvertType(E->getType());
639 Address DestAddr = CheckAtomicAlignment(CGF, E);
640 auto *Result = CGF.Builder.CreateAtomicRMW(
641 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
642 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
643}
644
645// Build a plain volatile load.
647 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
648 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
649 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
650 llvm::Type *ITy =
651 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
652 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
653 Load->setVolatile(true);
654 return Load;
655}
656
657// Build a plain volatile store.
659 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
660 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
661 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
662 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
663 llvm::StoreInst *Store =
664 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
665 Store->setVolatile(true);
666 return Store;
667}
668
669// Emit a simple mangled intrinsic that has 1 argument and a return type
670// matching the argument type. Depending on mode, this may be a constrained
671// floating-point intrinsic.
673 const CallExpr *E, unsigned IntrinsicID,
674 unsigned ConstrainedIntrinsicID) {
675 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
676
677 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
678 if (CGF.Builder.getIsFPConstrained()) {
679 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
680 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
681 } else {
682 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
683 return CGF.Builder.CreateCall(F, Src0);
684 }
685}
686
687// Emit an intrinsic that has 2 operands of the same type as its result.
688// Depending on mode, this may be a constrained floating-point intrinsic.
690 const CallExpr *E, unsigned IntrinsicID,
691 unsigned ConstrainedIntrinsicID) {
692 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
693 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
694
695 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
696 if (CGF.Builder.getIsFPConstrained()) {
697 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
698 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
699 } else {
700 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
701 return CGF.Builder.CreateCall(F, { Src0, Src1 });
702 }
703}
704
705// Has second type mangled argument.
707 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
708 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
709 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
710 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
711
712 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
713 if (CGF.Builder.getIsFPConstrained()) {
714 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
715 {Src0->getType(), Src1->getType()});
716 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
717 }
718
719 Function *F =
720 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
721 return CGF.Builder.CreateCall(F, {Src0, Src1});
722}
723
724// Emit an intrinsic that has 3 operands of the same type as its result.
725// Depending on mode, this may be a constrained floating-point intrinsic.
727 const CallExpr *E, unsigned IntrinsicID,
728 unsigned ConstrainedIntrinsicID) {
729 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
730 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
731 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
732
733 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
734 if (CGF.Builder.getIsFPConstrained()) {
735 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
736 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
737 } else {
738 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
739 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
740 }
741}
742
743// Emit an intrinsic where all operands are of the same type as the result.
744// Depending on mode, this may be a constrained floating-point intrinsic.
746 unsigned IntrinsicID,
747 unsigned ConstrainedIntrinsicID,
748 llvm::Type *Ty,
749 ArrayRef<Value *> Args) {
750 Function *F;
751 if (CGF.Builder.getIsFPConstrained())
752 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
753 else
754 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
755
756 if (CGF.Builder.getIsFPConstrained())
757 return CGF.Builder.CreateConstrainedFPCall(F, Args);
758 else
759 return CGF.Builder.CreateCall(F, Args);
760}
761
762// Emit a simple intrinsic that has N scalar arguments and a return type
763// matching the argument type. It is assumed that only the first argument is
764// overloaded.
765template <unsigned N>
767 const CallExpr *E,
768 unsigned IntrinsicID,
769 llvm::StringRef Name = "") {
770 static_assert(N, "expect non-empty argument");
772 for (unsigned I = 0; I < N; ++I)
773 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
774 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
775 return CGF.Builder.CreateCall(F, Args, Name);
776}
777
778// Emit an intrinsic that has 1 float or double operand, and 1 integer.
780 const CallExpr *E,
781 unsigned IntrinsicID) {
782 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
783 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
784
785 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
786 return CGF.Builder.CreateCall(F, {Src0, Src1});
787}
788
789// Emit an intrinsic that has overloaded integer result and fp operand.
790static Value *
792 unsigned IntrinsicID,
793 unsigned ConstrainedIntrinsicID) {
794 llvm::Type *ResultType = CGF.ConvertType(E->getType());
795 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
796
797 if (CGF.Builder.getIsFPConstrained()) {
798 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
799 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
800 {ResultType, Src0->getType()});
801 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
802 } else {
803 Function *F =
804 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
805 return CGF.Builder.CreateCall(F, Src0);
806 }
807}
808
810 llvm::Intrinsic::ID IntrinsicID) {
811 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
812 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
813
814 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
815 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
816 llvm::Function *F =
817 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
818 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
819
820 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
821 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
822 CGF.EmitStoreOfScalar(Exp, LV);
823
824 return CGF.Builder.CreateExtractValue(Call, 0);
825}
826
828 llvm::Intrinsic::ID IntrinsicID) {
829 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
830 llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
831 llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
832
833 llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
834 llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
835
836 llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
837 llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
838
839 QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
840 LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
841 LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
842
843 llvm::StoreInst *StoreSin =
844 CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
845 llvm::StoreInst *StoreCos =
846 CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
847
848 // Mark the two stores as non-aliasing with each other. The order of stores
849 // emitted by this builtin is arbitrary, enforcing a particular order will
850 // prevent optimizations later on.
851 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
852 MDNode *Domain = MDHelper.createAnonymousAliasScopeDomain();
853 MDNode *AliasScope = MDHelper.createAnonymousAliasScope(Domain);
854 MDNode *AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
855 StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
856 StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
857}
858
859/// EmitFAbs - Emit a call to @llvm.fabs().
861 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
862 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
863 Call->setDoesNotAccessMemory();
864 return Call;
865}
866
867/// Emit the computation of the sign bit for a floating point value. Returns
868/// the i1 sign bit value.
870 LLVMContext &C = CGF.CGM.getLLVMContext();
871
872 llvm::Type *Ty = V->getType();
873 int Width = Ty->getPrimitiveSizeInBits();
874 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
875 V = CGF.Builder.CreateBitCast(V, IntTy);
876 if (Ty->isPPC_FP128Ty()) {
877 // We want the sign bit of the higher-order double. The bitcast we just
878 // did works as if the double-double was stored to memory and then
879 // read as an i128. The "store" will put the higher-order double in the
880 // lower address in both little- and big-Endian modes, but the "load"
881 // will treat those bits as a different part of the i128: the low bits in
882 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
883 // we need to shift the high bits down to the low before truncating.
884 Width >>= 1;
885 if (CGF.getTarget().isBigEndian()) {
886 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
887 V = CGF.Builder.CreateLShr(V, ShiftCst);
888 }
889 // We are truncating value in order to extract the higher-order
890 // double, which we will be using to extract the sign from.
891 IntTy = llvm::IntegerType::get(C, Width);
892 V = CGF.Builder.CreateTrunc(V, IntTy);
893 }
894 Value *Zero = llvm::Constant::getNullValue(IntTy);
895 return CGF.Builder.CreateICmpSLT(V, Zero);
896}
897
898/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
899/// hidden pointer). This is used to check annotating FP libcalls (that could
900/// set `errno`) with "int" TBAA metadata is safe. If any floating-point
901/// arguments are passed indirectly, setup for the call could be incorrectly
902/// optimized out.
904 auto IsIndirect = [&](ABIArgInfo const &info) {
905 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
906 };
907 return !IsIndirect(FnInfo.getReturnInfo()) &&
908 llvm::none_of(FnInfo.arguments(),
909 [&](CGFunctionInfoArgInfo const &ArgInfo) {
910 return IsIndirect(ArgInfo.info);
911 });
912}
913
915 const CallExpr *E, llvm::Constant *calleeValue) {
916 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
917 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
918 llvm::CallBase *callOrInvoke = nullptr;
919 CGFunctionInfo const *FnInfo = nullptr;
920 RValue Call =
921 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
922 /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
923
924 if (unsigned BuiltinID = FD->getBuiltinID()) {
925 // Check whether a FP math builtin function, such as BI__builtin_expf
926 ASTContext &Context = CGF.getContext();
927 bool ConstWithoutErrnoAndExceptions =
929 // Restrict to target with errno, for example, MacOS doesn't set errno.
930 // TODO: Support builtin function with complex type returned, eg: cacosh
931 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
932 !CGF.Builder.getIsFPConstrained() && Call.isScalar() &&
934 // Emit "int" TBAA metadata on FP math libcalls.
935 clang::QualType IntTy = Context.IntTy;
936 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
937 CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
938 }
939 }
940 return Call;
941}
942
943/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
944/// depending on IntrinsicID.
945///
946/// \arg CGF The current codegen function.
947/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
948/// \arg X The first argument to the llvm.*.with.overflow.*.
949/// \arg Y The second argument to the llvm.*.with.overflow.*.
950/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
951/// \returns The result (i.e. sum/product) returned by the intrinsic.
952static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
953 const llvm::Intrinsic::ID IntrinsicID,
954 llvm::Value *X, llvm::Value *Y,
955 llvm::Value *&Carry) {
956 // Make sure we have integers of the same width.
957 assert(X->getType() == Y->getType() &&
958 "Arguments must be the same type. (Did you forget to make sure both "
959 "arguments have the same integer width?)");
960
961 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
962 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
963 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
964 return CGF.Builder.CreateExtractValue(Tmp, 0);
965}
966
967static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
968 int low, int high) {
969 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
970 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
971 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
972 Call->addRangeRetAttr(CR);
973 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
974 return Call;
975}
976
977namespace {
978 struct WidthAndSignedness {
979 unsigned Width;
980 bool Signed;
981 };
982}
983
984static WidthAndSignedness
986 const clang::QualType Type) {
987 assert(Type->isIntegerType() && "Given type is not an integer.");
988 unsigned Width = context.getIntWidth(Type);
990 return {Width, Signed};
991}
992
993// Given one or more integer types, this function produces an integer type that
994// encompasses them: any value in one of the given types could be expressed in
995// the encompassing type.
996static struct WidthAndSignedness
997EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
998 assert(Types.size() > 0 && "Empty list of types.");
999
1000 // If any of the given types is signed, we must return a signed type.
1001 bool Signed = false;
1002 for (const auto &Type : Types) {
1003 Signed |= Type.Signed;
1004 }
1005
1006 // The encompassing type must have a width greater than or equal to the width
1007 // of the specified types. Additionally, if the encompassing type is signed,
1008 // its width must be strictly greater than the width of any unsigned types
1009 // given.
1010 unsigned Width = 0;
1011 for (const auto &Type : Types) {
1012 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
1013 if (Width < MinWidth) {
1014 Width = MinWidth;
1015 }
1016 }
1017
1018 return {Width, Signed};
1019}
1020
1021Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
1022 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1023 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
1024 ArgValue);
1025}
1026
1027/// Checks if using the result of __builtin_object_size(p, @p From) in place of
1028/// __builtin_object_size(p, @p To) is correct
1029static bool areBOSTypesCompatible(int From, int To) {
1030 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1031 // Type=2 identically. Encoding this implementation detail here may make
1032 // improving __builtin_object_size difficult in the future, so it's omitted.
1033 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1034}
1035
1036static llvm::Value *
1037getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
1038 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
1039}
1040
1041llvm::Value *
1042CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
1043 llvm::IntegerType *ResType,
1044 llvm::Value *EmittedE,
1045 bool IsDynamic) {
1046 uint64_t ObjectSize;
1047 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
1048 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
1049 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
1050}
1051
1053 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
1054 uint64_t &Offset) {
1055 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
1056 getLangOpts().getStrictFlexArraysLevel();
1057 uint32_t FieldNo = 0;
1058
1059 if (RD->isImplicit())
1060 return nullptr;
1061
1062 for (const FieldDecl *FD : RD->fields()) {
1063 if ((!FAMDecl || FD == FAMDecl) &&
1065 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1066 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1067 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1068 Offset += Layout.getFieldOffset(FieldNo);
1069 return FD;
1070 }
1071
1072 QualType Ty = FD->getType();
1073 if (Ty->isRecordType()) {
1075 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
1076 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1077 Offset += Layout.getFieldOffset(FieldNo);
1078 return Field;
1079 }
1080 }
1081
1082 if (!RD->isUnion())
1083 ++FieldNo;
1084 }
1085
1086 return nullptr;
1087}
1088
1089static unsigned CountCountedByAttrs(const RecordDecl *RD) {
1090 unsigned Num = 0;
1091
1092 for (const FieldDecl *FD : RD->fields()) {
1093 if (FD->getType()->isCountAttributedType())
1094 return ++Num;
1095
1096 QualType Ty = FD->getType();
1097 if (Ty->isRecordType())
1099 }
1100
1101 return Num;
1102}
1103
1104llvm::Value *
1105CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
1106 llvm::IntegerType *ResType) {
1107 // The code generated here calculates the size of a struct with a flexible
1108 // array member that uses the counted_by attribute. There are two instances
1109 // we handle:
1110 //
1111 // struct s {
1112 // unsigned long flags;
1113 // int count;
1114 // int array[] __attribute__((counted_by(count)));
1115 // }
1116 //
1117 // 1) bdos of the flexible array itself:
1118 //
1119 // __builtin_dynamic_object_size(p->array, 1) ==
1120 // p->count * sizeof(*p->array)
1121 //
1122 // 2) bdos of a pointer into the flexible array:
1123 //
1124 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1125 // (p->count - 42) * sizeof(*p->array)
1126 //
1127 // 2) bdos of the whole struct, including the flexible array:
1128 //
1129 // __builtin_dynamic_object_size(p, 1) ==
1130 // max(sizeof(struct s),
1131 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1132 //
1133 ASTContext &Ctx = getContext();
1134 const Expr *Base = E->IgnoreParenImpCasts();
1135 const Expr *Idx = nullptr;
1136
1137 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
1138 UO && UO->getOpcode() == UO_AddrOf) {
1139 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
1140 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1141 Base = ASE->getBase()->IgnoreParenImpCasts();
1142 Idx = ASE->getIdx()->IgnoreParenImpCasts();
1143
1144 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1145 int64_t Val = IL->getValue().getSExtValue();
1146 if (Val < 0)
1147 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1148
1149 if (Val == 0)
1150 // The index is 0, so we don't need to take it into account.
1151 Idx = nullptr;
1152 }
1153 } else {
1154 // Potential pointer to another element in the struct.
1155 Base = SubExpr;
1156 }
1157 }
1158
1159 // Get the flexible array member Decl.
1160 const RecordDecl *OuterRD = nullptr;
1161 const FieldDecl *FAMDecl = nullptr;
1162 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1163 // Check if \p Base is referencing the FAM itself.
1164 const ValueDecl *VD = ME->getMemberDecl();
1166 FAMDecl = dyn_cast<FieldDecl>(VD);
1167 if (!FAMDecl)
1168 return nullptr;
1169 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1170 // Check if we're pointing to the whole struct.
1171 QualType Ty = DRE->getDecl()->getType();
1172 if (Ty->isPointerType())
1173 Ty = Ty->getPointeeType();
1174 OuterRD = Ty->getAsRecordDecl();
1175
1176 // If we have a situation like this:
1177 //
1178 // struct union_of_fams {
1179 // int flags;
1180 // union {
1181 // signed char normal_field;
1182 // struct {
1183 // int count1;
1184 // int arr1[] __counted_by(count1);
1185 // };
1186 // struct {
1187 // signed char count2;
1188 // int arr2[] __counted_by(count2);
1189 // };
1190 // };
1191 // };
1192 //
1193 // We don't know which 'count' to use in this scenario:
1194 //
1195 // size_t get_size(struct union_of_fams *p) {
1196 // return __builtin_dynamic_object_size(p, 1);
1197 // }
1198 //
1199 // Instead of calculating a wrong number, we give up.
1200 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1201 return nullptr;
1202 }
1203
1204 if (!OuterRD)
1205 return nullptr;
1206
1207 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1208 // get its offset.
1209 uint64_t Offset = 0;
1210 FAMDecl =
1211 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
1212 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
1213
1214 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1215 // No flexible array member found or it doesn't have the "counted_by"
1216 // attribute.
1217 return nullptr;
1218
1219 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
1220 if (!CountedByFD)
1221 // Can't find the field referenced by the "counted_by" attribute.
1222 return nullptr;
1223
1224 if (isa<DeclRefExpr>(Base))
1225 // The whole struct is specificed in the __bdos. The calculation of the
1226 // whole size of the structure can be done in two ways:
1227 //
1228 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1229 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1230 //
1231 // The first will add additional padding after the end of the array,
1232 // allocation while the second method is more precise, but not quite
1233 // expected from programmers. See
1234 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1235 // discussion of the topic.
1236 //
1237 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1238 // structure. Therefore, because of the above issue, we'll choose to match
1239 // what GCC does for consistency's sake.
1240 return nullptr;
1241
1242 // Build a load of the counted_by field.
1243 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1244 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
1245 if (!CountedByInst)
1246 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1247
1248 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1249
1250 // Build a load of the index and subtract it from the count.
1251 Value *IdxInst = nullptr;
1252 if (Idx) {
1253 if (Idx->HasSideEffects(getContext()))
1254 // We can't have side-effects.
1255 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1256
1257 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1258 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1259 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1260
1261 // We go ahead with the calculation here. If the index turns out to be
1262 // negative, we'll catch it at the end.
1263 CountedByInst =
1264 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1265 }
1266
1267 // Calculate how large the flexible array member is in bytes.
1268 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1270 llvm::Constant *ElemSize =
1271 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1272 Value *Res =
1273 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1274 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1275
1276 // A negative \p IdxInst or \p CountedByInst means that the index lands
1277 // outside of the flexible array member. If that's the case, we want to
1278 // return 0.
1279 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1280 if (IdxInst)
1281 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1282
1283 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1284}
1285
1286/// Returns a Value corresponding to the size of the given expression.
1287/// This Value may be either of the following:
1288/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1289/// it)
1290/// - A call to the @llvm.objectsize intrinsic
1291///
1292/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1293/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1294/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1295llvm::Value *
1296CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1297 llvm::IntegerType *ResType,
1298 llvm::Value *EmittedE, bool IsDynamic) {
1299 // We need to reference an argument if the pointer is a parameter with the
1300 // pass_object_size attribute.
1301 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1302 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1303 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1304 if (Param != nullptr && PS != nullptr &&
1305 areBOSTypesCompatible(PS->getType(), Type)) {
1306 auto Iter = SizeArguments.find(Param);
1307 assert(Iter != SizeArguments.end());
1308
1309 const ImplicitParamDecl *D = Iter->second;
1310 auto DIter = LocalDeclMap.find(D);
1311 assert(DIter != LocalDeclMap.end());
1312
1313 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1314 getContext().getSizeType(), E->getBeginLoc());
1315 }
1316 }
1317
1318 if (IsDynamic) {
1319 // Emit special code for a flexible array member with the "counted_by"
1320 // attribute.
1321 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1322 return V;
1323 }
1324
1325 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1326 // evaluate E for side-effects. In either case, we shouldn't lower to
1327 // @llvm.objectsize.
1328 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1329 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1330
1331 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1332 assert(Ptr->getType()->isPointerTy() &&
1333 "Non-pointer passed to __builtin_object_size?");
1334
1335 Function *F =
1336 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1337
1338 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1339 Value *Min = Builder.getInt1((Type & 2) != 0);
1340 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1341 Value *NullIsUnknown = Builder.getTrue();
1342 Value *Dynamic = Builder.getInt1(IsDynamic);
1343 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1344}
1345
1346namespace {
1347/// A struct to generically describe a bit test intrinsic.
1348struct BitTest {
1349 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1350 enum InterlockingKind : uint8_t {
1351 Unlocked,
1352 Sequential,
1353 Acquire,
1354 Release,
1355 NoFence
1356 };
1357
1358 ActionKind Action;
1359 InterlockingKind Interlocking;
1360 bool Is64Bit;
1361
1362 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1363};
1364
1365} // namespace
1366
1367BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1368 switch (BuiltinID) {
1369 // Main portable variants.
1370 case Builtin::BI_bittest:
1371 return {TestOnly, Unlocked, false};
1372 case Builtin::BI_bittestandcomplement:
1373 return {Complement, Unlocked, false};
1374 case Builtin::BI_bittestandreset:
1375 return {Reset, Unlocked, false};
1376 case Builtin::BI_bittestandset:
1377 return {Set, Unlocked, false};
1378 case Builtin::BI_interlockedbittestandreset:
1379 return {Reset, Sequential, false};
1380 case Builtin::BI_interlockedbittestandset:
1381 return {Set, Sequential, false};
1382
1383 // X86-specific 64-bit variants.
1384 case Builtin::BI_bittest64:
1385 return {TestOnly, Unlocked, true};
1386 case Builtin::BI_bittestandcomplement64:
1387 return {Complement, Unlocked, true};
1388 case Builtin::BI_bittestandreset64:
1389 return {Reset, Unlocked, true};
1390 case Builtin::BI_bittestandset64:
1391 return {Set, Unlocked, true};
1392 case Builtin::BI_interlockedbittestandreset64:
1393 return {Reset, Sequential, true};
1394 case Builtin::BI_interlockedbittestandset64:
1395 return {Set, Sequential, true};
1396
1397 // ARM/AArch64-specific ordering variants.
1398 case Builtin::BI_interlockedbittestandset_acq:
1399 return {Set, Acquire, false};
1400 case Builtin::BI_interlockedbittestandset_rel:
1401 return {Set, Release, false};
1402 case Builtin::BI_interlockedbittestandset_nf:
1403 return {Set, NoFence, false};
1404 case Builtin::BI_interlockedbittestandreset_acq:
1405 return {Reset, Acquire, false};
1406 case Builtin::BI_interlockedbittestandreset_rel:
1407 return {Reset, Release, false};
1408 case Builtin::BI_interlockedbittestandreset_nf:
1409 return {Reset, NoFence, false};
1410 }
1411 llvm_unreachable("expected only bittest intrinsics");
1412}
1413
1414static char bitActionToX86BTCode(BitTest::ActionKind A) {
1415 switch (A) {
1416 case BitTest::TestOnly: return '\0';
1417 case BitTest::Complement: return 'c';
1418 case BitTest::Reset: return 'r';
1419 case BitTest::Set: return 's';
1420 }
1421 llvm_unreachable("invalid action");
1422}
1423
1425 BitTest BT,
1426 const CallExpr *E, Value *BitBase,
1427 Value *BitPos) {
1428 char Action = bitActionToX86BTCode(BT.Action);
1429 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1430
1431 // Build the assembly.
1433 raw_svector_ostream AsmOS(Asm);
1434 if (BT.Interlocking != BitTest::Unlocked)
1435 AsmOS << "lock ";
1436 AsmOS << "bt";
1437 if (Action)
1438 AsmOS << Action;
1439 AsmOS << SizeSuffix << " $2, ($1)";
1440
1441 // Build the constraints. FIXME: We should support immediates when possible.
1442 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1443 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1444 if (!MachineClobbers.empty()) {
1445 Constraints += ',';
1446 Constraints += MachineClobbers;
1447 }
1448 llvm::IntegerType *IntType = llvm::IntegerType::get(
1449 CGF.getLLVMContext(),
1450 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1451 llvm::FunctionType *FTy =
1452 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1453
1454 llvm::InlineAsm *IA =
1455 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1456 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1457}
1458
1459static llvm::AtomicOrdering
1460getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1461 switch (I) {
1462 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1463 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1464 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1465 case BitTest::Release: return llvm::AtomicOrdering::Release;
1466 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1467 }
1468 llvm_unreachable("invalid interlocking");
1469}
1470
1471/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1472/// bits and a bit position and read and optionally modify the bit at that
1473/// position. The position index can be arbitrarily large, i.e. it can be larger
1474/// than 31 or 63, so we need an indexed load in the general case.
1475static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1476 unsigned BuiltinID,
1477 const CallExpr *E) {
1478 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1479 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1480
1481 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1482
1483 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1484 // indexing operation internally. Use them if possible.
1485 if (CGF.getTarget().getTriple().isX86())
1486 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1487
1488 // Otherwise, use generic code to load one byte and test the bit. Use all but
1489 // the bottom three bits as the array index, and the bottom three bits to form
1490 // a mask.
1491 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1492 Value *ByteIndex = CGF.Builder.CreateAShr(
1493 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1494 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBase, ByteIndex,
1495 "bittest.byteaddr"),
1496 CGF.Int8Ty, CharUnits::One());
1497 Value *PosLow =
1498 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1499 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1500
1501 // The updating instructions will need a mask.
1502 Value *Mask = nullptr;
1503 if (BT.Action != BitTest::TestOnly) {
1504 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1505 "bittest.mask");
1506 }
1507
1508 // Check the action and ordering of the interlocked intrinsics.
1509 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1510
1511 Value *OldByte = nullptr;
1512 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1513 // Emit a combined atomicrmw load/store operation for the interlocked
1514 // intrinsics.
1515 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1516 if (BT.Action == BitTest::Reset) {
1517 Mask = CGF.Builder.CreateNot(Mask);
1518 RMWOp = llvm::AtomicRMWInst::And;
1519 }
1520 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1521 } else {
1522 // Emit a plain load for the non-interlocked intrinsics.
1523 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1524 Value *NewByte = nullptr;
1525 switch (BT.Action) {
1526 case BitTest::TestOnly:
1527 // Don't store anything.
1528 break;
1529 case BitTest::Complement:
1530 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1531 break;
1532 case BitTest::Reset:
1533 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1534 break;
1535 case BitTest::Set:
1536 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1537 break;
1538 }
1539 if (NewByte)
1540 CGF.Builder.CreateStore(NewByte, ByteAddr);
1541 }
1542
1543 // However we loaded the old byte, either by plain load or atomicrmw, shift
1544 // the bit into the low position and mask it to 0 or 1.
1545 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1546 return CGF.Builder.CreateAnd(
1547 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1548}
1549
1551 unsigned BuiltinID,
1552 const CallExpr *E) {
1553 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1554
1556 raw_svector_ostream AsmOS(Asm);
1557 llvm::IntegerType *RetType = CGF.Int32Ty;
1558
1559 switch (BuiltinID) {
1560 case clang::PPC::BI__builtin_ppc_ldarx:
1561 AsmOS << "ldarx ";
1562 RetType = CGF.Int64Ty;
1563 break;
1564 case clang::PPC::BI__builtin_ppc_lwarx:
1565 AsmOS << "lwarx ";
1566 RetType = CGF.Int32Ty;
1567 break;
1568 case clang::PPC::BI__builtin_ppc_lharx:
1569 AsmOS << "lharx ";
1570 RetType = CGF.Int16Ty;
1571 break;
1572 case clang::PPC::BI__builtin_ppc_lbarx:
1573 AsmOS << "lbarx ";
1574 RetType = CGF.Int8Ty;
1575 break;
1576 default:
1577 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1578 }
1579
1580 AsmOS << "$0, ${1:y}";
1581
1582 std::string Constraints = "=r,*Z,~{memory}";
1583 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1584 if (!MachineClobbers.empty()) {
1585 Constraints += ',';
1586 Constraints += MachineClobbers;
1587 }
1588
1589 llvm::Type *PtrType = CGF.UnqualPtrTy;
1590 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1591
1592 llvm::InlineAsm *IA =
1593 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1594 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1595 CI->addParamAttr(
1596 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1597 return CI;
1598}
1599
1600namespace {
1601enum class MSVCSetJmpKind {
1602 _setjmpex,
1603 _setjmp3,
1604 _setjmp
1605};
1606}
1607
1608/// MSVC handles setjmp a bit differently on different platforms. On every
1609/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1610/// parameters can be passed as variadic arguments, but we always pass none.
1611static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1612 const CallExpr *E) {
1613 llvm::Value *Arg1 = nullptr;
1614 llvm::Type *Arg1Ty = nullptr;
1615 StringRef Name;
1616 bool IsVarArg = false;
1617 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1618 Name = "_setjmp3";
1619 Arg1Ty = CGF.Int32Ty;
1620 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1621 IsVarArg = true;
1622 } else {
1623 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1624 Arg1Ty = CGF.Int8PtrTy;
1625 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1626 Arg1 = CGF.Builder.CreateCall(
1627 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1628 } else
1629 Arg1 = CGF.Builder.CreateCall(
1630 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1631 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1632 }
1633
1634 // Mark the call site and declaration with ReturnsTwice.
1635 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1636 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1637 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1638 llvm::Attribute::ReturnsTwice);
1639 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1640 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1641 ReturnsTwiceAttr, /*Local=*/true);
1642
1643 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1644 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1645 llvm::Value *Args[] = {Buf, Arg1};
1646 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1647 CB->setAttributes(ReturnsTwiceAttr);
1648 return RValue::get(CB);
1649}
1650
1651// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1652// we handle them here.
1693 __fastfail,
1694};
1695
1696static std::optional<CodeGenFunction::MSVCIntrin>
1697translateArmToMsvcIntrin(unsigned BuiltinID) {
1698 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1699 switch (BuiltinID) {
1700 default:
1701 return std::nullopt;
1702 case clang::ARM::BI_BitScanForward:
1703 case clang::ARM::BI_BitScanForward64:
1704 return MSVCIntrin::_BitScanForward;
1705 case clang::ARM::BI_BitScanReverse:
1706 case clang::ARM::BI_BitScanReverse64:
1707 return MSVCIntrin::_BitScanReverse;
1708 case clang::ARM::BI_InterlockedAnd64:
1709 return MSVCIntrin::_InterlockedAnd;
1710 case clang::ARM::BI_InterlockedExchange64:
1711 return MSVCIntrin::_InterlockedExchange;
1712 case clang::ARM::BI_InterlockedExchangeAdd64:
1713 return MSVCIntrin::_InterlockedExchangeAdd;
1714 case clang::ARM::BI_InterlockedExchangeSub64:
1715 return MSVCIntrin::_InterlockedExchangeSub;
1716 case clang::ARM::BI_InterlockedOr64:
1717 return MSVCIntrin::_InterlockedOr;
1718 case clang::ARM::BI_InterlockedXor64:
1719 return MSVCIntrin::_InterlockedXor;
1720 case clang::ARM::BI_InterlockedDecrement64:
1721 return MSVCIntrin::_InterlockedDecrement;
1722 case clang::ARM::BI_InterlockedIncrement64:
1723 return MSVCIntrin::_InterlockedIncrement;
1724 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1725 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1726 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1727 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1728 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1729 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1730 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1731 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1732 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1733 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1734 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1735 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1736 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1737 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1738 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1739 case clang::ARM::BI_InterlockedExchange8_acq:
1740 case clang::ARM::BI_InterlockedExchange16_acq:
1741 case clang::ARM::BI_InterlockedExchange_acq:
1742 case clang::ARM::BI_InterlockedExchange64_acq:
1743 case clang::ARM::BI_InterlockedExchangePointer_acq:
1744 return MSVCIntrin::_InterlockedExchange_acq;
1745 case clang::ARM::BI_InterlockedExchange8_rel:
1746 case clang::ARM::BI_InterlockedExchange16_rel:
1747 case clang::ARM::BI_InterlockedExchange_rel:
1748 case clang::ARM::BI_InterlockedExchange64_rel:
1749 case clang::ARM::BI_InterlockedExchangePointer_rel:
1750 return MSVCIntrin::_InterlockedExchange_rel;
1751 case clang::ARM::BI_InterlockedExchange8_nf:
1752 case clang::ARM::BI_InterlockedExchange16_nf:
1753 case clang::ARM::BI_InterlockedExchange_nf:
1754 case clang::ARM::BI_InterlockedExchange64_nf:
1755 case clang::ARM::BI_InterlockedExchangePointer_nf:
1756 return MSVCIntrin::_InterlockedExchange_nf;
1757 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1758 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1759 case clang::ARM::BI_InterlockedCompareExchange_acq:
1760 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1761 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1762 return MSVCIntrin::_InterlockedCompareExchange_acq;
1763 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1764 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1765 case clang::ARM::BI_InterlockedCompareExchange_rel:
1766 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1767 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1768 return MSVCIntrin::_InterlockedCompareExchange_rel;
1769 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1770 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1771 case clang::ARM::BI_InterlockedCompareExchange_nf:
1772 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1773 return MSVCIntrin::_InterlockedCompareExchange_nf;
1774 case clang::ARM::BI_InterlockedOr8_acq:
1775 case clang::ARM::BI_InterlockedOr16_acq:
1776 case clang::ARM::BI_InterlockedOr_acq:
1777 case clang::ARM::BI_InterlockedOr64_acq:
1778 return MSVCIntrin::_InterlockedOr_acq;
1779 case clang::ARM::BI_InterlockedOr8_rel:
1780 case clang::ARM::BI_InterlockedOr16_rel:
1781 case clang::ARM::BI_InterlockedOr_rel:
1782 case clang::ARM::BI_InterlockedOr64_rel:
1783 return MSVCIntrin::_InterlockedOr_rel;
1784 case clang::ARM::BI_InterlockedOr8_nf:
1785 case clang::ARM::BI_InterlockedOr16_nf:
1786 case clang::ARM::BI_InterlockedOr_nf:
1787 case clang::ARM::BI_InterlockedOr64_nf:
1788 return MSVCIntrin::_InterlockedOr_nf;
1789 case clang::ARM::BI_InterlockedXor8_acq:
1790 case clang::ARM::BI_InterlockedXor16_acq:
1791 case clang::ARM::BI_InterlockedXor_acq:
1792 case clang::ARM::BI_InterlockedXor64_acq:
1793 return MSVCIntrin::_InterlockedXor_acq;
1794 case clang::ARM::BI_InterlockedXor8_rel:
1795 case clang::ARM::BI_InterlockedXor16_rel:
1796 case clang::ARM::BI_InterlockedXor_rel:
1797 case clang::ARM::BI_InterlockedXor64_rel:
1798 return MSVCIntrin::_InterlockedXor_rel;
1799 case clang::ARM::BI_InterlockedXor8_nf:
1800 case clang::ARM::BI_InterlockedXor16_nf:
1801 case clang::ARM::BI_InterlockedXor_nf:
1802 case clang::ARM::BI_InterlockedXor64_nf:
1803 return MSVCIntrin::_InterlockedXor_nf;
1804 case clang::ARM::BI_InterlockedAnd8_acq:
1805 case clang::ARM::BI_InterlockedAnd16_acq:
1806 case clang::ARM::BI_InterlockedAnd_acq:
1807 case clang::ARM::BI_InterlockedAnd64_acq:
1808 return MSVCIntrin::_InterlockedAnd_acq;
1809 case clang::ARM::BI_InterlockedAnd8_rel:
1810 case clang::ARM::BI_InterlockedAnd16_rel:
1811 case clang::ARM::BI_InterlockedAnd_rel:
1812 case clang::ARM::BI_InterlockedAnd64_rel:
1813 return MSVCIntrin::_InterlockedAnd_rel;
1814 case clang::ARM::BI_InterlockedAnd8_nf:
1815 case clang::ARM::BI_InterlockedAnd16_nf:
1816 case clang::ARM::BI_InterlockedAnd_nf:
1817 case clang::ARM::BI_InterlockedAnd64_nf:
1818 return MSVCIntrin::_InterlockedAnd_nf;
1819 case clang::ARM::BI_InterlockedIncrement16_acq:
1820 case clang::ARM::BI_InterlockedIncrement_acq:
1821 case clang::ARM::BI_InterlockedIncrement64_acq:
1822 return MSVCIntrin::_InterlockedIncrement_acq;
1823 case clang::ARM::BI_InterlockedIncrement16_rel:
1824 case clang::ARM::BI_InterlockedIncrement_rel:
1825 case clang::ARM::BI_InterlockedIncrement64_rel:
1826 return MSVCIntrin::_InterlockedIncrement_rel;
1827 case clang::ARM::BI_InterlockedIncrement16_nf:
1828 case clang::ARM::BI_InterlockedIncrement_nf:
1829 case clang::ARM::BI_InterlockedIncrement64_nf:
1830 return MSVCIntrin::_InterlockedIncrement_nf;
1831 case clang::ARM::BI_InterlockedDecrement16_acq:
1832 case clang::ARM::BI_InterlockedDecrement_acq:
1833 case clang::ARM::BI_InterlockedDecrement64_acq:
1834 return MSVCIntrin::_InterlockedDecrement_acq;
1835 case clang::ARM::BI_InterlockedDecrement16_rel:
1836 case clang::ARM::BI_InterlockedDecrement_rel:
1837 case clang::ARM::BI_InterlockedDecrement64_rel:
1838 return MSVCIntrin::_InterlockedDecrement_rel;
1839 case clang::ARM::BI_InterlockedDecrement16_nf:
1840 case clang::ARM::BI_InterlockedDecrement_nf:
1841 case clang::ARM::BI_InterlockedDecrement64_nf:
1842 return MSVCIntrin::_InterlockedDecrement_nf;
1843 }
1844 llvm_unreachable("must return from switch");
1845}
1846
1847static std::optional<CodeGenFunction::MSVCIntrin>
1848translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1849 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1850 switch (BuiltinID) {
1851 default:
1852 return std::nullopt;
1853 case clang::AArch64::BI_BitScanForward:
1854 case clang::AArch64::BI_BitScanForward64:
1855 return MSVCIntrin::_BitScanForward;
1856 case clang::AArch64::BI_BitScanReverse:
1857 case clang::AArch64::BI_BitScanReverse64:
1858 return MSVCIntrin::_BitScanReverse;
1859 case clang::AArch64::BI_InterlockedAnd64:
1860 return MSVCIntrin::_InterlockedAnd;
1861 case clang::AArch64::BI_InterlockedExchange64:
1862 return MSVCIntrin::_InterlockedExchange;
1863 case clang::AArch64::BI_InterlockedExchangeAdd64:
1864 return MSVCIntrin::_InterlockedExchangeAdd;
1865 case clang::AArch64::BI_InterlockedExchangeSub64:
1866 return MSVCIntrin::_InterlockedExchangeSub;
1867 case clang::AArch64::BI_InterlockedOr64:
1868 return MSVCIntrin::_InterlockedOr;
1869 case clang::AArch64::BI_InterlockedXor64:
1870 return MSVCIntrin::_InterlockedXor;
1871 case clang::AArch64::BI_InterlockedDecrement64:
1872 return MSVCIntrin::_InterlockedDecrement;
1873 case clang::AArch64::BI_InterlockedIncrement64:
1874 return MSVCIntrin::_InterlockedIncrement;
1875 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1876 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1877 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1878 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1879 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1880 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1881 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1882 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1883 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1884 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1885 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1886 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1887 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1888 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1889 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1890 case clang::AArch64::BI_InterlockedExchange8_acq:
1891 case clang::AArch64::BI_InterlockedExchange16_acq:
1892 case clang::AArch64::BI_InterlockedExchange_acq:
1893 case clang::AArch64::BI_InterlockedExchange64_acq:
1894 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1895 return MSVCIntrin::_InterlockedExchange_acq;
1896 case clang::AArch64::BI_InterlockedExchange8_rel:
1897 case clang::AArch64::BI_InterlockedExchange16_rel:
1898 case clang::AArch64::BI_InterlockedExchange_rel:
1899 case clang::AArch64::BI_InterlockedExchange64_rel:
1900 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1901 return MSVCIntrin::_InterlockedExchange_rel;
1902 case clang::AArch64::BI_InterlockedExchange8_nf:
1903 case clang::AArch64::BI_InterlockedExchange16_nf:
1904 case clang::AArch64::BI_InterlockedExchange_nf:
1905 case clang::AArch64::BI_InterlockedExchange64_nf:
1906 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1907 return MSVCIntrin::_InterlockedExchange_nf;
1908 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1909 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1910 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1911 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1912 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1913 return MSVCIntrin::_InterlockedCompareExchange_acq;
1914 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1915 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1916 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1917 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1918 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1919 return MSVCIntrin::_InterlockedCompareExchange_rel;
1920 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1921 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1922 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1923 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1924 return MSVCIntrin::_InterlockedCompareExchange_nf;
1925 case clang::AArch64::BI_InterlockedCompareExchange128:
1926 return MSVCIntrin::_InterlockedCompareExchange128;
1927 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1928 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1929 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1930 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1931 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1932 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1933 case clang::AArch64::BI_InterlockedOr8_acq:
1934 case clang::AArch64::BI_InterlockedOr16_acq:
1935 case clang::AArch64::BI_InterlockedOr_acq:
1936 case clang::AArch64::BI_InterlockedOr64_acq:
1937 return MSVCIntrin::_InterlockedOr_acq;
1938 case clang::AArch64::BI_InterlockedOr8_rel:
1939 case clang::AArch64::BI_InterlockedOr16_rel:
1940 case clang::AArch64::BI_InterlockedOr_rel:
1941 case clang::AArch64::BI_InterlockedOr64_rel:
1942 return MSVCIntrin::_InterlockedOr_rel;
1943 case clang::AArch64::BI_InterlockedOr8_nf:
1944 case clang::AArch64::BI_InterlockedOr16_nf:
1945 case clang::AArch64::BI_InterlockedOr_nf:
1946 case clang::AArch64::BI_InterlockedOr64_nf:
1947 return MSVCIntrin::_InterlockedOr_nf;
1948 case clang::AArch64::BI_InterlockedXor8_acq:
1949 case clang::AArch64::BI_InterlockedXor16_acq:
1950 case clang::AArch64::BI_InterlockedXor_acq:
1951 case clang::AArch64::BI_InterlockedXor64_acq:
1952 return MSVCIntrin::_InterlockedXor_acq;
1953 case clang::AArch64::BI_InterlockedXor8_rel:
1954 case clang::AArch64::BI_InterlockedXor16_rel:
1955 case clang::AArch64::BI_InterlockedXor_rel:
1956 case clang::AArch64::BI_InterlockedXor64_rel:
1957 return MSVCIntrin::_InterlockedXor_rel;
1958 case clang::AArch64::BI_InterlockedXor8_nf:
1959 case clang::AArch64::BI_InterlockedXor16_nf:
1960 case clang::AArch64::BI_InterlockedXor_nf:
1961 case clang::AArch64::BI_InterlockedXor64_nf:
1962 return MSVCIntrin::_InterlockedXor_nf;
1963 case clang::AArch64::BI_InterlockedAnd8_acq:
1964 case clang::AArch64::BI_InterlockedAnd16_acq:
1965 case clang::AArch64::BI_InterlockedAnd_acq:
1966 case clang::AArch64::BI_InterlockedAnd64_acq:
1967 return MSVCIntrin::_InterlockedAnd_acq;
1968 case clang::AArch64::BI_InterlockedAnd8_rel:
1969 case clang::AArch64::BI_InterlockedAnd16_rel:
1970 case clang::AArch64::BI_InterlockedAnd_rel:
1971 case clang::AArch64::BI_InterlockedAnd64_rel:
1972 return MSVCIntrin::_InterlockedAnd_rel;
1973 case clang::AArch64::BI_InterlockedAnd8_nf:
1974 case clang::AArch64::BI_InterlockedAnd16_nf:
1975 case clang::AArch64::BI_InterlockedAnd_nf:
1976 case clang::AArch64::BI_InterlockedAnd64_nf:
1977 return MSVCIntrin::_InterlockedAnd_nf;
1978 case clang::AArch64::BI_InterlockedIncrement16_acq:
1979 case clang::AArch64::BI_InterlockedIncrement_acq:
1980 case clang::AArch64::BI_InterlockedIncrement64_acq:
1981 return MSVCIntrin::_InterlockedIncrement_acq;
1982 case clang::AArch64::BI_InterlockedIncrement16_rel:
1983 case clang::AArch64::BI_InterlockedIncrement_rel:
1984 case clang::AArch64::BI_InterlockedIncrement64_rel:
1985 return MSVCIntrin::_InterlockedIncrement_rel;
1986 case clang::AArch64::BI_InterlockedIncrement16_nf:
1987 case clang::AArch64::BI_InterlockedIncrement_nf:
1988 case clang::AArch64::BI_InterlockedIncrement64_nf:
1989 return MSVCIntrin::_InterlockedIncrement_nf;
1990 case clang::AArch64::BI_InterlockedDecrement16_acq:
1991 case clang::AArch64::BI_InterlockedDecrement_acq:
1992 case clang::AArch64::BI_InterlockedDecrement64_acq:
1993 return MSVCIntrin::_InterlockedDecrement_acq;
1994 case clang::AArch64::BI_InterlockedDecrement16_rel:
1995 case clang::AArch64::BI_InterlockedDecrement_rel:
1996 case clang::AArch64::BI_InterlockedDecrement64_rel:
1997 return MSVCIntrin::_InterlockedDecrement_rel;
1998 case clang::AArch64::BI_InterlockedDecrement16_nf:
1999 case clang::AArch64::BI_InterlockedDecrement_nf:
2000 case clang::AArch64::BI_InterlockedDecrement64_nf:
2001 return MSVCIntrin::_InterlockedDecrement_nf;
2002 }
2003 llvm_unreachable("must return from switch");
2004}
2005
2006static std::optional<CodeGenFunction::MSVCIntrin>
2007translateX86ToMsvcIntrin(unsigned BuiltinID) {
2008 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
2009 switch (BuiltinID) {
2010 default:
2011 return std::nullopt;
2012 case clang::X86::BI_BitScanForward:
2013 case clang::X86::BI_BitScanForward64:
2014 return MSVCIntrin::_BitScanForward;
2015 case clang::X86::BI_BitScanReverse:
2016 case clang::X86::BI_BitScanReverse64:
2017 return MSVCIntrin::_BitScanReverse;
2018 case clang::X86::BI_InterlockedAnd64:
2019 return MSVCIntrin::_InterlockedAnd;
2020 case clang::X86::BI_InterlockedCompareExchange128:
2021 return MSVCIntrin::_InterlockedCompareExchange128;
2022 case clang::X86::BI_InterlockedExchange64:
2023 return MSVCIntrin::_InterlockedExchange;
2024 case clang::X86::BI_InterlockedExchangeAdd64:
2025 return MSVCIntrin::_InterlockedExchangeAdd;
2026 case clang::X86::BI_InterlockedExchangeSub64:
2027 return MSVCIntrin::_InterlockedExchangeSub;
2028 case clang::X86::BI_InterlockedOr64:
2029 return MSVCIntrin::_InterlockedOr;
2030 case clang::X86::BI_InterlockedXor64:
2031 return MSVCIntrin::_InterlockedXor;
2032 case clang::X86::BI_InterlockedDecrement64:
2033 return MSVCIntrin::_InterlockedDecrement;
2034 case clang::X86::BI_InterlockedIncrement64:
2035 return MSVCIntrin::_InterlockedIncrement;
2036 }
2037 llvm_unreachable("must return from switch");
2038}
2039
2040// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2041Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
2042 const CallExpr *E) {
2043 switch (BuiltinID) {
2044 case MSVCIntrin::_BitScanForward:
2045 case MSVCIntrin::_BitScanReverse: {
2046 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
2047 Value *ArgValue = EmitScalarExpr(E->getArg(1));
2048
2049 llvm::Type *ArgType = ArgValue->getType();
2050 llvm::Type *IndexType = IndexAddress.getElementType();
2051 llvm::Type *ResultType = ConvertType(E->getType());
2052
2053 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2054 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2055 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2056
2057 BasicBlock *Begin = Builder.GetInsertBlock();
2058 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
2059 Builder.SetInsertPoint(End);
2060 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
2061
2062 Builder.SetInsertPoint(Begin);
2063 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
2064 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
2065 Builder.CreateCondBr(IsZero, End, NotZero);
2066 Result->addIncoming(ResZero, Begin);
2067
2068 Builder.SetInsertPoint(NotZero);
2069
2070 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2071 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2072 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2073 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2074 Builder.CreateStore(ZeroCount, IndexAddress, false);
2075 } else {
2076 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2077 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2078
2079 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2080 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2081 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2082 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2083 Builder.CreateStore(Index, IndexAddress, false);
2084 }
2085 Builder.CreateBr(End);
2086 Result->addIncoming(ResOne, NotZero);
2087
2088 Builder.SetInsertPoint(End);
2089 return Result;
2090 }
2091 case MSVCIntrin::_InterlockedAnd:
2092 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2093 case MSVCIntrin::_InterlockedExchange:
2094 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2095 case MSVCIntrin::_InterlockedExchangeAdd:
2096 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2097 case MSVCIntrin::_InterlockedExchangeSub:
2098 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2099 case MSVCIntrin::_InterlockedOr:
2100 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2101 case MSVCIntrin::_InterlockedXor:
2102 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2103 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2104 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2105 AtomicOrdering::Acquire);
2106 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2107 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2108 AtomicOrdering::Release);
2109 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2110 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2111 AtomicOrdering::Monotonic);
2112 case MSVCIntrin::_InterlockedExchange_acq:
2113 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2114 AtomicOrdering::Acquire);
2115 case MSVCIntrin::_InterlockedExchange_rel:
2116 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2117 AtomicOrdering::Release);
2118 case MSVCIntrin::_InterlockedExchange_nf:
2119 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2120 AtomicOrdering::Monotonic);
2121 case MSVCIntrin::_InterlockedCompareExchange:
2122 return EmitAtomicCmpXchgForMSIntrin(*this, E);
2123 case MSVCIntrin::_InterlockedCompareExchange_acq:
2124 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2125 case MSVCIntrin::_InterlockedCompareExchange_rel:
2126 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2127 case MSVCIntrin::_InterlockedCompareExchange_nf:
2128 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2129 case MSVCIntrin::_InterlockedCompareExchange128:
2131 *this, E, AtomicOrdering::SequentiallyConsistent);
2132 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2133 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2134 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2135 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2136 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2137 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2138 case MSVCIntrin::_InterlockedOr_acq:
2139 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2140 AtomicOrdering::Acquire);
2141 case MSVCIntrin::_InterlockedOr_rel:
2142 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2143 AtomicOrdering::Release);
2144 case MSVCIntrin::_InterlockedOr_nf:
2145 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2146 AtomicOrdering::Monotonic);
2147 case MSVCIntrin::_InterlockedXor_acq:
2148 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2149 AtomicOrdering::Acquire);
2150 case MSVCIntrin::_InterlockedXor_rel:
2151 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2152 AtomicOrdering::Release);
2153 case MSVCIntrin::_InterlockedXor_nf:
2154 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2155 AtomicOrdering::Monotonic);
2156 case MSVCIntrin::_InterlockedAnd_acq:
2157 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2158 AtomicOrdering::Acquire);
2159 case MSVCIntrin::_InterlockedAnd_rel:
2160 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2161 AtomicOrdering::Release);
2162 case MSVCIntrin::_InterlockedAnd_nf:
2163 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2164 AtomicOrdering::Monotonic);
2165 case MSVCIntrin::_InterlockedIncrement_acq:
2166 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2167 case MSVCIntrin::_InterlockedIncrement_rel:
2168 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2169 case MSVCIntrin::_InterlockedIncrement_nf:
2170 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2171 case MSVCIntrin::_InterlockedDecrement_acq:
2172 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2173 case MSVCIntrin::_InterlockedDecrement_rel:
2174 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2175 case MSVCIntrin::_InterlockedDecrement_nf:
2176 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2177
2178 case MSVCIntrin::_InterlockedDecrement:
2179 return EmitAtomicDecrementValue(*this, E);
2180 case MSVCIntrin::_InterlockedIncrement:
2181 return EmitAtomicIncrementValue(*this, E);
2182
2183 case MSVCIntrin::__fastfail: {
2184 // Request immediate process termination from the kernel. The instruction
2185 // sequences to do this are documented on MSDN:
2186 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2187 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2188 StringRef Asm, Constraints;
2189 switch (ISA) {
2190 default:
2191 ErrorUnsupported(E, "__fastfail call for this architecture");
2192 break;
2193 case llvm::Triple::x86:
2194 case llvm::Triple::x86_64:
2195 Asm = "int $$0x29";
2196 Constraints = "{cx}";
2197 break;
2198 case llvm::Triple::thumb:
2199 Asm = "udf #251";
2200 Constraints = "{r0}";
2201 break;
2202 case llvm::Triple::aarch64:
2203 Asm = "brk #0xF003";
2204 Constraints = "{w0}";
2205 }
2206 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2207 llvm::InlineAsm *IA =
2208 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2209 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2210 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2211 llvm::Attribute::NoReturn);
2212 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2213 CI->setAttributes(NoReturnAttr);
2214 return CI;
2215 }
2216 }
2217 llvm_unreachable("Incorrect MSVC intrinsic!");
2218}
2219
2220namespace {
2221// ARC cleanup for __builtin_os_log_format
2222struct CallObjCArcUse final : EHScopeStack::Cleanup {
2223 CallObjCArcUse(llvm::Value *object) : object(object) {}
2224 llvm::Value *object;
2225
2226 void Emit(CodeGenFunction &CGF, Flags flags) override {
2227 CGF.EmitARCIntrinsicUse(object);
2228 }
2229};
2230}
2231
2233 BuiltinCheckKind Kind) {
2234 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
2235 "Unsupported builtin check kind");
2236
2237 Value *ArgValue = EmitScalarExpr(E);
2238 if (!SanOpts.has(SanitizerKind::Builtin))
2239 return ArgValue;
2240
2241 SanitizerScope SanScope(this);
2242 Value *Cond = Builder.CreateICmpNE(
2243 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2244 EmitCheck(std::make_pair(Cond, SanitizerKind::SO_Builtin),
2245 SanitizerHandler::InvalidBuiltin,
2247 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2248 {});
2249 return ArgValue;
2250}
2251
2253 Value *ArgValue = EvaluateExprAsBool(E);
2254 if (!SanOpts.has(SanitizerKind::Builtin))
2255 return ArgValue;
2256
2257 SanitizerScope SanScope(this);
2258 EmitCheck(
2259 std::make_pair(ArgValue, SanitizerKind::SO_Builtin),
2260 SanitizerHandler::InvalidBuiltin,
2262 llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)},
2263 std::nullopt);
2264 return ArgValue;
2265}
2266
2267static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2268 return CGF.Builder.CreateBinaryIntrinsic(
2269 Intrinsic::abs, ArgValue,
2270 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2271}
2272
2274 bool SanitizeOverflow) {
2275 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2276
2277 // Try to eliminate overflow check.
2278 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2279 if (!VCI->isMinSignedValue())
2280 return EmitAbs(CGF, ArgValue, true);
2281 }
2282
2283 CodeGenFunction::SanitizerScope SanScope(&CGF);
2284
2285 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2286 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2287 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2288 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2289 Value *NotOverflow = CGF.Builder.CreateNot(
2290 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2291
2292 // TODO: support -ftrapv-handler.
2293 if (SanitizeOverflow) {
2294 CGF.EmitCheck({{NotOverflow, SanitizerKind::SO_SignedIntegerOverflow}},
2295 SanitizerHandler::NegateOverflow,
2296 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2298 {ArgValue});
2299 } else
2300 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2301
2302 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2303 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2304}
2305
2306/// Get the argument type for arguments to os_log_helper.
2308 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2309 return C.getCanonicalType(UnsignedTy);
2310}
2311
2314 CharUnits BufferAlignment) {
2315 ASTContext &Ctx = getContext();
2316
2318 {
2319 raw_svector_ostream OS(Name);
2320 OS << "__os_log_helper";
2321 OS << "_" << BufferAlignment.getQuantity();
2322 OS << "_" << int(Layout.getSummaryByte());
2323 OS << "_" << int(Layout.getNumArgsByte());
2324 for (const auto &Item : Layout.Items)
2325 OS << "_" << int(Item.getSizeByte()) << "_"
2326 << int(Item.getDescriptorByte());
2327 }
2328
2329 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2330 return F;
2331
2333 FunctionArgList Args;
2334 Args.push_back(ImplicitParamDecl::Create(
2335 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2337 ArgTys.emplace_back(Ctx.VoidPtrTy);
2338
2339 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2340 char Size = Layout.Items[I].getSizeByte();
2341 if (!Size)
2342 continue;
2343
2344 QualType ArgTy = getOSLogArgType(Ctx, Size);
2345 Args.push_back(ImplicitParamDecl::Create(
2346 Ctx, nullptr, SourceLocation(),
2347 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2349 ArgTys.emplace_back(ArgTy);
2350 }
2351
2352 QualType ReturnTy = Ctx.VoidTy;
2353
2354 // The helper function has linkonce_odr linkage to enable the linker to merge
2355 // identical functions. To ensure the merging always happens, 'noinline' is
2356 // attached to the function when compiling with -Oz.
2357 const CGFunctionInfo &FI =
2359 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2360 llvm::Function *Fn = llvm::Function::Create(
2361 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2362 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2363 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2365 Fn->setDoesNotThrow();
2366
2367 // Attach 'noinline' at -Oz.
2368 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2369 Fn->addFnAttr(llvm::Attribute::NoInline);
2370
2371 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2372 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2373
2374 // Create a scope with an artificial location for the body of this function.
2375 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2376
2377 CharUnits Offset;
2379 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2380 BufferAlignment);
2381 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2382 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2383 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2384 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2385
2386 unsigned I = 1;
2387 for (const auto &Item : Layout.Items) {
2389 Builder.getInt8(Item.getDescriptorByte()),
2390 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2392 Builder.getInt8(Item.getSizeByte()),
2393 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2394
2395 CharUnits Size = Item.size();
2396 if (!Size.getQuantity())
2397 continue;
2398
2399 Address Arg = GetAddrOfLocalVar(Args[I]);
2400 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2401 Addr = Addr.withElementType(Arg.getElementType());
2403 Offset += Size;
2404 ++I;
2405 }
2406
2408
2409 return Fn;
2410}
2411
2413 assert(E.getNumArgs() >= 2 &&
2414 "__builtin_os_log_format takes at least 2 arguments");
2415 ASTContext &Ctx = getContext();
2418 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2419 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2420
2421 // Ignore argument 1, the format string. It is not currently used.
2422 CallArgList Args;
2423 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2424
2425 for (const auto &Item : Layout.Items) {
2426 int Size = Item.getSizeByte();
2427 if (!Size)
2428 continue;
2429
2430 llvm::Value *ArgVal;
2431
2432 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2433 uint64_t Val = 0;
2434 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2435 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2436 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2437 } else if (const Expr *TheExpr = Item.getExpr()) {
2438 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2439
2440 // If a temporary object that requires destruction after the full
2441 // expression is passed, push a lifetime-extended cleanup to extend its
2442 // lifetime to the end of the enclosing block scope.
2443 auto LifetimeExtendObject = [&](const Expr *E) {
2444 E = E->IgnoreParenCasts();
2445 // Extend lifetimes of objects returned by function calls and message
2446 // sends.
2447
2448 // FIXME: We should do this in other cases in which temporaries are
2449 // created including arguments of non-ARC types (e.g., C++
2450 // temporaries).
2451 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2452 return true;
2453 return false;
2454 };
2455
2456 if (TheExpr->getType()->isObjCRetainableType() &&
2457 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2458 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2459 "Only scalar can be a ObjC retainable type");
2460 if (!isa<Constant>(ArgVal)) {
2461 CleanupKind Cleanup = getARCCleanupKind();
2462 QualType Ty = TheExpr->getType();
2464 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2465 ArgVal = EmitARCRetain(Ty, ArgVal);
2466 Builder.CreateStore(ArgVal, Addr);
2467 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2469 Cleanup & EHCleanup);
2470
2471 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2472 // argument has to be alive.
2473 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2474 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2475 }
2476 }
2477 } else {
2478 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2479 }
2480
2481 unsigned ArgValSize =
2482 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2483 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2484 ArgValSize);
2485 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2486 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2487 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2488 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2489 Args.add(RValue::get(ArgVal), ArgTy);
2490 }
2491
2492 const CGFunctionInfo &FI =
2495 Layout, BufAddr.getAlignment());
2497 return RValue::get(BufAddr, *this);
2498}
2499
2501 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2502 WidthAndSignedness ResultInfo) {
2503 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2504 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2505 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2506}
2507
2509 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2510 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2511 const clang::Expr *ResultArg, QualType ResultQTy,
2512 WidthAndSignedness ResultInfo) {
2514 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2515 "Cannot specialize this multiply");
2516
2517 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2518 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2519
2520 llvm::Value *HasOverflow;
2521 llvm::Value *Result = EmitOverflowIntrinsic(
2522 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2523
2524 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2525 // however, since the original builtin had a signed result, we need to report
2526 // an overflow when the result is greater than INT_MAX.
2527 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2528 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2529
2530 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2531 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2532
2533 bool isVolatile =
2534 ResultArg->getType()->getPointeeType().isVolatileQualified();
2535 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2536 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2537 isVolatile);
2538 return RValue::get(HasOverflow);
2539}
2540
2541/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2542static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2543 WidthAndSignedness Op1Info,
2544 WidthAndSignedness Op2Info,
2545 WidthAndSignedness ResultInfo) {
2546 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2547 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2548 Op1Info.Signed != Op2Info.Signed;
2549}
2550
2551/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2552/// the generic checked-binop irgen.
2553static RValue
2555 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2556 WidthAndSignedness Op2Info,
2557 const clang::Expr *ResultArg, QualType ResultQTy,
2558 WidthAndSignedness ResultInfo) {
2559 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2560 Op2Info, ResultInfo) &&
2561 "Not a mixed-sign multipliction we can specialize");
2562
2563 // Emit the signed and unsigned operands.
2564 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2565 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2566 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2567 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2568 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2569 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2570
2571 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2572 if (SignedOpWidth < UnsignedOpWidth)
2573 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2574 if (UnsignedOpWidth < SignedOpWidth)
2575 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2576
2577 llvm::Type *OpTy = Signed->getType();
2578 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2579 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2580 llvm::Type *ResTy = ResultPtr.getElementType();
2581 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2582
2583 // Take the absolute value of the signed operand.
2584 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2585 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2586 llvm::Value *AbsSigned =
2587 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2588
2589 // Perform a checked unsigned multiplication.
2590 llvm::Value *UnsignedOverflow;
2591 llvm::Value *UnsignedResult =
2592 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2593 Unsigned, UnsignedOverflow);
2594
2595 llvm::Value *Overflow, *Result;
2596 if (ResultInfo.Signed) {
2597 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2598 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2599 auto IntMax =
2600 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2601 llvm::Value *MaxResult =
2602 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2603 CGF.Builder.CreateZExt(IsNegative, OpTy));
2604 llvm::Value *SignedOverflow =
2605 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2606 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2607
2608 // Prepare the signed result (possibly by negating it).
2609 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2610 llvm::Value *SignedResult =
2611 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2612 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2613 } else {
2614 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2615 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2616 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2617 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2618 if (ResultInfo.Width < OpWidth) {
2619 auto IntMax =
2620 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2621 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2622 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2623 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2624 }
2625
2626 // Negate the product if it would be negative in infinite precision.
2627 Result = CGF.Builder.CreateSelect(
2628 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2629
2630 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2631 }
2632 assert(Overflow && Result && "Missing overflow or result");
2633
2634 bool isVolatile =
2635 ResultArg->getType()->getPointeeType().isVolatileQualified();
2636 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2637 isVolatile);
2638 return RValue::get(Overflow);
2639}
2640
2641static bool
2643 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2644 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2645 Ty = Ctx.getBaseElementType(Arr);
2646
2647 const auto *Record = Ty->getAsCXXRecordDecl();
2648 if (!Record)
2649 return false;
2650
2651 // We've already checked this type, or are in the process of checking it.
2652 if (!Seen.insert(Record).second)
2653 return false;
2654
2655 assert(Record->hasDefinition() &&
2656 "Incomplete types should already be diagnosed");
2657
2658 if (Record->isDynamicClass())
2659 return true;
2660
2661 for (FieldDecl *F : Record->fields()) {
2662 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2663 return true;
2664 }
2665 return false;
2666}
2667
2668/// Determine if the specified type requires laundering by checking if it is a
2669/// dynamic class type or contains a subobject which is a dynamic class type.
2671 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2672 return false;
2674 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2675}
2676
2677RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2678 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2679 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2680
2681 // The builtin's shift arg may have a different type than the source arg and
2682 // result, but the LLVM intrinsic uses the same type for all values.
2683 llvm::Type *Ty = Src->getType();
2684 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2685
2686 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2687 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2688 Function *F = CGM.getIntrinsic(IID, Ty);
2689 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2690}
2691
2692// Map math builtins for long-double to f128 version.
2693static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2694 switch (BuiltinID) {
2695#define MUTATE_LDBL(func) \
2696 case Builtin::BI__builtin_##func##l: \
2697 return Builtin::BI__builtin_##func##f128;
2728 MUTATE_LDBL(nans)
2729 MUTATE_LDBL(inf)
2748 MUTATE_LDBL(huge_val)
2758#undef MUTATE_LDBL
2759 default:
2760 return BuiltinID;
2761 }
2762}
2763
2764static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2765 Value *V) {
2766 if (CGF.Builder.getIsFPConstrained() &&
2767 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2768 if (Value *Result =
2769 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2770 return Result;
2771 }
2772 return nullptr;
2773}
2774
2776 const FunctionDecl *FD) {
2777 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2778 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2779 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2780
2782 for (auto &&FormalTy : FnTy->params())
2783 Args.push_back(llvm::PoisonValue::get(FormalTy));
2784
2785 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2786}
2787
2788RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2789 const CallExpr *E,
2790 ReturnValueSlot ReturnValue) {
2791 assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
2792 "Should not codegen for consteval builtins");
2793
2794 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2795 // See if we can constant fold this builtin. If so, don't emit it at all.
2796 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2799 !Result.hasSideEffects()) {
2800 if (Result.Val.isInt())
2801 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2802 Result.Val.getInt()));
2803 if (Result.Val.isFloat())
2804 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2805 Result.Val.getFloat()));
2806 }
2807
2808 // If current long-double semantics is IEEE 128-bit, replace math builtins
2809 // of long-double with f128 equivalent.
2810 // TODO: This mutation should also be applied to other targets other than PPC,
2811 // after backend supports IEEE 128-bit style libcalls.
2812 if (getTarget().getTriple().isPPC64() &&
2813 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2814 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2815
2816 // If the builtin has been declared explicitly with an assembler label,
2817 // disable the specialized emitting below. Ideally we should communicate the
2818 // rename in IR, or at least avoid generating the intrinsic calls that are
2819 // likely to get lowered to the renamed library functions.
2820 const unsigned BuiltinIDIfNoAsmLabel =
2821 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2822
2823 std::optional<bool> ErrnoOverriden;
2824 // ErrnoOverriden is true if math-errno is overriden via the
2825 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2826 // which implies math-errno.
2827 if (E->hasStoredFPFeatures()) {
2828 FPOptionsOverride OP = E->getFPFeatures();
2829 if (OP.hasMathErrnoOverride())
2830 ErrnoOverriden = OP.getMathErrnoOverride();
2831 }
2832 // True if 'attribute__((optnone))' is used. This attribute overrides
2833 // fast-math which implies math-errno.
2834 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2835
2836 // True if we are compiling at -O2 and errno has been disabled
2837 // using the '#pragma float_control(precise, off)', and
2838 // attribute opt-none hasn't been seen.
2839 bool ErrnoOverridenToFalseWithOpt =
2840 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2841 CGM.getCodeGenOpts().OptimizationLevel != 0;
2842
2843 // There are LLVM math intrinsics/instructions corresponding to math library
2844 // functions except the LLVM op will never set errno while the math library
2845 // might. Also, math builtins have the same semantics as their math library
2846 // twins. Thus, we can transform math library and builtin calls to their
2847 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2848 // In case FP exceptions are enabled, the experimental versions of the
2849 // intrinsics model those.
2850 bool ConstAlways =
2851 getContext().BuiltinInfo.isConst(BuiltinID);
2852
2853 // There's a special case with the fma builtins where they are always const
2854 // if the target environment is GNU or the target is OS is Windows and we're
2855 // targeting the MSVCRT.dll environment.
2856 // FIXME: This list can be become outdated. Need to find a way to get it some
2857 // other way.
2858 switch (BuiltinID) {
2859 case Builtin::BI__builtin_fma:
2860 case Builtin::BI__builtin_fmaf:
2861 case Builtin::BI__builtin_fmal:
2862 case Builtin::BI__builtin_fmaf16:
2863 case Builtin::BIfma:
2864 case Builtin::BIfmaf:
2865 case Builtin::BIfmal: {
2866 auto &Trip = CGM.getTriple();
2867 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2868 ConstAlways = true;
2869 break;
2870 }
2871 default:
2872 break;
2873 }
2874
2875 bool ConstWithoutErrnoAndExceptions =
2877 bool ConstWithoutExceptions =
2879
2880 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2881 // disabled.
2882 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2883 // or attributes that affect math-errno should prevent or allow math
2884 // intrincs to be generated. Intrinsics are generated:
2885 // 1- In fast math mode, unless math-errno is overriden
2886 // via '#pragma float_control(precise, on)', or via an
2887 // 'attribute__((optnone))'.
2888 // 2- If math-errno was enabled on command line but overriden
2889 // to false via '#pragma float_control(precise, off))' and
2890 // 'attribute__((optnone))' hasn't been used.
2891 // 3- If we are compiling with optimization and errno has been disabled
2892 // via '#pragma float_control(precise, off)', and
2893 // 'attribute__((optnone))' hasn't been used.
2894
2895 bool ConstWithoutErrnoOrExceptions =
2896 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2897 bool GenerateIntrinsics =
2898 (ConstAlways && !OptNone) ||
2899 (!getLangOpts().MathErrno &&
2900 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2901 if (!GenerateIntrinsics) {
2902 GenerateIntrinsics =
2903 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2904 if (!GenerateIntrinsics)
2905 GenerateIntrinsics =
2906 ConstWithoutErrnoOrExceptions &&
2907 (!getLangOpts().MathErrno &&
2908 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2909 if (!GenerateIntrinsics)
2910 GenerateIntrinsics =
2911 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2912 }
2913 if (GenerateIntrinsics) {
2914 switch (BuiltinIDIfNoAsmLabel) {
2915 case Builtin::BIacos:
2916 case Builtin::BIacosf:
2917 case Builtin::BIacosl:
2918 case Builtin::BI__builtin_acos:
2919 case Builtin::BI__builtin_acosf:
2920 case Builtin::BI__builtin_acosf16:
2921 case Builtin::BI__builtin_acosl:
2922 case Builtin::BI__builtin_acosf128:
2924 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2925
2926 case Builtin::BIasin:
2927 case Builtin::BIasinf:
2928 case Builtin::BIasinl:
2929 case Builtin::BI__builtin_asin:
2930 case Builtin::BI__builtin_asinf:
2931 case Builtin::BI__builtin_asinf16:
2932 case Builtin::BI__builtin_asinl:
2933 case Builtin::BI__builtin_asinf128:
2935 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2936
2937 case Builtin::BIatan:
2938 case Builtin::BIatanf:
2939 case Builtin::BIatanl:
2940 case Builtin::BI__builtin_atan:
2941 case Builtin::BI__builtin_atanf:
2942 case Builtin::BI__builtin_atanf16:
2943 case Builtin::BI__builtin_atanl:
2944 case Builtin::BI__builtin_atanf128:
2946 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2947
2948 case Builtin::BIatan2:
2949 case Builtin::BIatan2f:
2950 case Builtin::BIatan2l:
2951 case Builtin::BI__builtin_atan2:
2952 case Builtin::BI__builtin_atan2f:
2953 case Builtin::BI__builtin_atan2f16:
2954 case Builtin::BI__builtin_atan2l:
2955 case Builtin::BI__builtin_atan2f128:
2957 *this, E, Intrinsic::atan2,
2958 Intrinsic::experimental_constrained_atan2));
2959
2960 case Builtin::BIceil:
2961 case Builtin::BIceilf:
2962 case Builtin::BIceill:
2963 case Builtin::BI__builtin_ceil:
2964 case Builtin::BI__builtin_ceilf:
2965 case Builtin::BI__builtin_ceilf16:
2966 case Builtin::BI__builtin_ceill:
2967 case Builtin::BI__builtin_ceilf128:
2969 Intrinsic::ceil,
2970 Intrinsic::experimental_constrained_ceil));
2971
2972 case Builtin::BIcopysign:
2973 case Builtin::BIcopysignf:
2974 case Builtin::BIcopysignl:
2975 case Builtin::BI__builtin_copysign:
2976 case Builtin::BI__builtin_copysignf:
2977 case Builtin::BI__builtin_copysignf16:
2978 case Builtin::BI__builtin_copysignl:
2979 case Builtin::BI__builtin_copysignf128:
2980 return RValue::get(
2981 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2982
2983 case Builtin::BIcos:
2984 case Builtin::BIcosf:
2985 case Builtin::BIcosl:
2986 case Builtin::BI__builtin_cos:
2987 case Builtin::BI__builtin_cosf:
2988 case Builtin::BI__builtin_cosf16:
2989 case Builtin::BI__builtin_cosl:
2990 case Builtin::BI__builtin_cosf128:
2992 Intrinsic::cos,
2993 Intrinsic::experimental_constrained_cos));
2994
2995 case Builtin::BIcosh:
2996 case Builtin::BIcoshf:
2997 case Builtin::BIcoshl:
2998 case Builtin::BI__builtin_cosh:
2999 case Builtin::BI__builtin_coshf:
3000 case Builtin::BI__builtin_coshf16:
3001 case Builtin::BI__builtin_coshl:
3002 case Builtin::BI__builtin_coshf128:
3004 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
3005
3006 case Builtin::BIexp:
3007 case Builtin::BIexpf:
3008 case Builtin::BIexpl:
3009 case Builtin::BI__builtin_exp:
3010 case Builtin::BI__builtin_expf:
3011 case Builtin::BI__builtin_expf16:
3012 case Builtin::BI__builtin_expl:
3013 case Builtin::BI__builtin_expf128:
3015 Intrinsic::exp,
3016 Intrinsic::experimental_constrained_exp));
3017
3018 case Builtin::BIexp2:
3019 case Builtin::BIexp2f:
3020 case Builtin::BIexp2l:
3021 case Builtin::BI__builtin_exp2:
3022 case Builtin::BI__builtin_exp2f:
3023 case Builtin::BI__builtin_exp2f16:
3024 case Builtin::BI__builtin_exp2l:
3025 case Builtin::BI__builtin_exp2f128:
3027 Intrinsic::exp2,
3028 Intrinsic::experimental_constrained_exp2));
3029 case Builtin::BI__builtin_exp10:
3030 case Builtin::BI__builtin_exp10f:
3031 case Builtin::BI__builtin_exp10f16:
3032 case Builtin::BI__builtin_exp10l:
3033 case Builtin::BI__builtin_exp10f128: {
3034 // TODO: strictfp support
3035 if (Builder.getIsFPConstrained())
3036 break;
3037 return RValue::get(
3038 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
3039 }
3040 case Builtin::BIfabs:
3041 case Builtin::BIfabsf:
3042 case Builtin::BIfabsl:
3043 case Builtin::BI__builtin_fabs:
3044 case Builtin::BI__builtin_fabsf:
3045 case Builtin::BI__builtin_fabsf16:
3046 case Builtin::BI__builtin_fabsl:
3047 case Builtin::BI__builtin_fabsf128:
3048 return RValue::get(
3049 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
3050
3051 case Builtin::BIfloor:
3052 case Builtin::BIfloorf:
3053 case Builtin::BIfloorl:
3054 case Builtin::BI__builtin_floor:
3055 case Builtin::BI__builtin_floorf:
3056 case Builtin::BI__builtin_floorf16:
3057 case Builtin::BI__builtin_floorl:
3058 case Builtin::BI__builtin_floorf128:
3060 Intrinsic::floor,
3061 Intrinsic::experimental_constrained_floor));
3062
3063 case Builtin::BIfma:
3064 case Builtin::BIfmaf:
3065 case Builtin::BIfmal:
3066 case Builtin::BI__builtin_fma:
3067 case Builtin::BI__builtin_fmaf:
3068 case Builtin::BI__builtin_fmaf16:
3069 case Builtin::BI__builtin_fmal:
3070 case Builtin::BI__builtin_fmaf128:
3072 Intrinsic::fma,
3073 Intrinsic::experimental_constrained_fma));
3074
3075 case Builtin::BIfmax:
3076 case Builtin::BIfmaxf:
3077 case Builtin::BIfmaxl:
3078 case Builtin::BI__builtin_fmax:
3079 case Builtin::BI__builtin_fmaxf:
3080 case Builtin::BI__builtin_fmaxf16:
3081 case Builtin::BI__builtin_fmaxl:
3082 case Builtin::BI__builtin_fmaxf128:
3084 Intrinsic::maxnum,
3085 Intrinsic::experimental_constrained_maxnum));
3086
3087 case Builtin::BIfmin:
3088 case Builtin::BIfminf:
3089 case Builtin::BIfminl:
3090 case Builtin::BI__builtin_fmin:
3091 case Builtin::BI__builtin_fminf:
3092 case Builtin::BI__builtin_fminf16:
3093 case Builtin::BI__builtin_fminl:
3094 case Builtin::BI__builtin_fminf128:
3096 Intrinsic::minnum,
3097 Intrinsic::experimental_constrained_minnum));
3098
3099 case Builtin::BIfmaximum_num:
3100 case Builtin::BIfmaximum_numf:
3101 case Builtin::BIfmaximum_numl:
3102 case Builtin::BI__builtin_fmaximum_num:
3103 case Builtin::BI__builtin_fmaximum_numf:
3104 case Builtin::BI__builtin_fmaximum_numf16:
3105 case Builtin::BI__builtin_fmaximum_numl:
3106 case Builtin::BI__builtin_fmaximum_numf128:
3107 return RValue::get(
3108 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::maximumnum));
3109
3110 case Builtin::BIfminimum_num:
3111 case Builtin::BIfminimum_numf:
3112 case Builtin::BIfminimum_numl:
3113 case Builtin::BI__builtin_fminimum_num:
3114 case Builtin::BI__builtin_fminimum_numf:
3115 case Builtin::BI__builtin_fminimum_numf16:
3116 case Builtin::BI__builtin_fminimum_numl:
3117 case Builtin::BI__builtin_fminimum_numf128:
3118 return RValue::get(
3119 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::minimumnum));
3120
3121 // fmod() is a special-case. It maps to the frem instruction rather than an
3122 // LLVM intrinsic.
3123 case Builtin::BIfmod:
3124 case Builtin::BIfmodf:
3125 case Builtin::BIfmodl:
3126 case Builtin::BI__builtin_fmod:
3127 case Builtin::BI__builtin_fmodf:
3128 case Builtin::BI__builtin_fmodf16:
3129 case Builtin::BI__builtin_fmodl:
3130 case Builtin::BI__builtin_fmodf128:
3131 case Builtin::BI__builtin_elementwise_fmod: {
3132 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3133 Value *Arg1 = EmitScalarExpr(E->getArg(0));
3134 Value *Arg2 = EmitScalarExpr(E->getArg(1));
3135 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
3136 }
3137
3138 case Builtin::BIlog:
3139 case Builtin::BIlogf:
3140 case Builtin::BIlogl:
3141 case Builtin::BI__builtin_log:
3142 case Builtin::BI__builtin_logf:
3143 case Builtin::BI__builtin_logf16:
3144 case Builtin::BI__builtin_logl:
3145 case Builtin::BI__builtin_logf128:
3147 Intrinsic::log,
3148 Intrinsic::experimental_constrained_log));
3149
3150 case Builtin::BIlog10:
3151 case Builtin::BIlog10f:
3152 case Builtin::BIlog10l:
3153 case Builtin::BI__builtin_log10:
3154 case Builtin::BI__builtin_log10f:
3155 case Builtin::BI__builtin_log10f16:
3156 case Builtin::BI__builtin_log10l:
3157 case Builtin::BI__builtin_log10f128:
3159 Intrinsic::log10,
3160 Intrinsic::experimental_constrained_log10));
3161
3162 case Builtin::BIlog2:
3163 case Builtin::BIlog2f:
3164 case Builtin::BIlog2l:
3165 case Builtin::BI__builtin_log2:
3166 case Builtin::BI__builtin_log2f:
3167 case Builtin::BI__builtin_log2f16:
3168 case Builtin::BI__builtin_log2l:
3169 case Builtin::BI__builtin_log2f128:
3171 Intrinsic::log2,
3172 Intrinsic::experimental_constrained_log2));
3173
3174 case Builtin::BInearbyint:
3175 case Builtin::BInearbyintf:
3176 case Builtin::BInearbyintl:
3177 case Builtin::BI__builtin_nearbyint:
3178 case Builtin::BI__builtin_nearbyintf:
3179 case Builtin::BI__builtin_nearbyintl:
3180 case Builtin::BI__builtin_nearbyintf128:
3182 Intrinsic::nearbyint,
3183 Intrinsic::experimental_constrained_nearbyint));
3184
3185 case Builtin::BIpow:
3186 case Builtin::BIpowf:
3187 case Builtin::BIpowl:
3188 case Builtin::BI__builtin_pow:
3189 case Builtin::BI__builtin_powf:
3190 case Builtin::BI__builtin_powf16:
3191 case Builtin::BI__builtin_powl:
3192 case Builtin::BI__builtin_powf128:
3194 Intrinsic::pow,
3195 Intrinsic::experimental_constrained_pow));
3196
3197 case Builtin::BIrint:
3198 case Builtin::BIrintf:
3199 case Builtin::BIrintl:
3200 case Builtin::BI__builtin_rint:
3201 case Builtin::BI__builtin_rintf:
3202 case Builtin::BI__builtin_rintf16:
3203 case Builtin::BI__builtin_rintl:
3204 case Builtin::BI__builtin_rintf128:
3206 Intrinsic::rint,
3207 Intrinsic::experimental_constrained_rint));
3208
3209 case Builtin::BIround:
3210 case Builtin::BIroundf:
3211 case Builtin::BIroundl:
3212 case Builtin::BI__builtin_round:
3213 case Builtin::BI__builtin_roundf:
3214 case Builtin::BI__builtin_roundf16:
3215 case Builtin::BI__builtin_roundl:
3216 case Builtin::BI__builtin_roundf128:
3218 Intrinsic::round,
3219 Intrinsic::experimental_constrained_round));
3220
3221 case Builtin::BIroundeven:
3222 case Builtin::BIroundevenf:
3223 case Builtin::BIroundevenl:
3224 case Builtin::BI__builtin_roundeven:
3225 case Builtin::BI__builtin_roundevenf:
3226 case Builtin::BI__builtin_roundevenf16:
3227 case Builtin::BI__builtin_roundevenl:
3228 case Builtin::BI__builtin_roundevenf128:
3230 Intrinsic::roundeven,
3231 Intrinsic::experimental_constrained_roundeven));
3232
3233 case Builtin::BIsin:
3234 case Builtin::BIsinf:
3235 case Builtin::BIsinl:
3236 case Builtin::BI__builtin_sin:
3237 case Builtin::BI__builtin_sinf:
3238 case Builtin::BI__builtin_sinf16:
3239 case Builtin::BI__builtin_sinl:
3240 case Builtin::BI__builtin_sinf128:
3242 Intrinsic::sin,
3243 Intrinsic::experimental_constrained_sin));
3244
3245 case Builtin::BIsinh:
3246 case Builtin::BIsinhf:
3247 case Builtin::BIsinhl:
3248 case Builtin::BI__builtin_sinh:
3249 case Builtin::BI__builtin_sinhf:
3250 case Builtin::BI__builtin_sinhf16:
3251 case Builtin::BI__builtin_sinhl:
3252 case Builtin::BI__builtin_sinhf128:
3254 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3255
3256 case Builtin::BI__builtin_sincos:
3257 case Builtin::BI__builtin_sincosf:
3258 case Builtin::BI__builtin_sincosf16:
3259 case Builtin::BI__builtin_sincosl:
3260 case Builtin::BI__builtin_sincosf128:
3261 emitSincosBuiltin(*this, E, Intrinsic::sincos);
3262 return RValue::get(nullptr);
3263
3264 case Builtin::BIsqrt:
3265 case Builtin::BIsqrtf:
3266 case Builtin::BIsqrtl:
3267 case Builtin::BI__builtin_sqrt:
3268 case Builtin::BI__builtin_sqrtf:
3269 case Builtin::BI__builtin_sqrtf16:
3270 case Builtin::BI__builtin_sqrtl:
3271 case Builtin::BI__builtin_sqrtf128:
3272 case Builtin::BI__builtin_elementwise_sqrt: {
3274 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3276 return RValue::get(Call);
3277 }
3278
3279 case Builtin::BItan:
3280 case Builtin::BItanf:
3281 case Builtin::BItanl:
3282 case Builtin::BI__builtin_tan:
3283 case Builtin::BI__builtin_tanf:
3284 case Builtin::BI__builtin_tanf16:
3285 case Builtin::BI__builtin_tanl:
3286 case Builtin::BI__builtin_tanf128:
3288 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3289
3290 case Builtin::BItanh:
3291 case Builtin::BItanhf:
3292 case Builtin::BItanhl:
3293 case Builtin::BI__builtin_tanh:
3294 case Builtin::BI__builtin_tanhf:
3295 case Builtin::BI__builtin_tanhf16:
3296 case Builtin::BI__builtin_tanhl:
3297 case Builtin::BI__builtin_tanhf128:
3299 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3300
3301 case Builtin::BItrunc:
3302 case Builtin::BItruncf:
3303 case Builtin::BItruncl:
3304 case Builtin::BI__builtin_trunc:
3305 case Builtin::BI__builtin_truncf:
3306 case Builtin::BI__builtin_truncf16:
3307 case Builtin::BI__builtin_truncl:
3308 case Builtin::BI__builtin_truncf128:
3310 Intrinsic::trunc,
3311 Intrinsic::experimental_constrained_trunc));
3312
3313 case Builtin::BIlround:
3314 case Builtin::BIlroundf:
3315 case Builtin::BIlroundl:
3316 case Builtin::BI__builtin_lround:
3317 case Builtin::BI__builtin_lroundf:
3318 case Builtin::BI__builtin_lroundl:
3319 case Builtin::BI__builtin_lroundf128:
3321 *this, E, Intrinsic::lround,
3322 Intrinsic::experimental_constrained_lround));
3323
3324 case Builtin::BIllround:
3325 case Builtin::BIllroundf:
3326 case Builtin::BIllroundl:
3327 case Builtin::BI__builtin_llround:
3328 case Builtin::BI__builtin_llroundf:
3329 case Builtin::BI__builtin_llroundl:
3330 case Builtin::BI__builtin_llroundf128:
3332 *this, E, Intrinsic::llround,
3333 Intrinsic::experimental_constrained_llround));
3334
3335 case Builtin::BIlrint:
3336 case Builtin::BIlrintf:
3337 case Builtin::BIlrintl:
3338 case Builtin::BI__builtin_lrint:
3339 case Builtin::BI__builtin_lrintf:
3340 case Builtin::BI__builtin_lrintl:
3341 case Builtin::BI__builtin_lrintf128:
3343 *this, E, Intrinsic::lrint,
3344 Intrinsic::experimental_constrained_lrint));
3345
3346 case Builtin::BIllrint:
3347 case Builtin::BIllrintf:
3348 case Builtin::BIllrintl:
3349 case Builtin::BI__builtin_llrint:
3350 case Builtin::BI__builtin_llrintf:
3351 case Builtin::BI__builtin_llrintl:
3352 case Builtin::BI__builtin_llrintf128:
3354 *this, E, Intrinsic::llrint,
3355 Intrinsic::experimental_constrained_llrint));
3356 case Builtin::BI__builtin_ldexp:
3357 case Builtin::BI__builtin_ldexpf:
3358 case Builtin::BI__builtin_ldexpl:
3359 case Builtin::BI__builtin_ldexpf16:
3360 case Builtin::BI__builtin_ldexpf128: {
3362 *this, E, Intrinsic::ldexp,
3363 Intrinsic::experimental_constrained_ldexp));
3364 }
3365 default:
3366 break;
3367 }
3368 }
3369
3370 // Check NonnullAttribute/NullabilityArg and Alignment.
3371 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3372 unsigned ParmNum) {
3373 Value *Val = A.emitRawPointer(*this);
3374 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3375 ParmNum);
3376
3377 if (SanOpts.has(SanitizerKind::Alignment)) {
3378 SanitizerSet SkippedChecks;
3379 SkippedChecks.set(SanitizerKind::All);
3380 SkippedChecks.clear(SanitizerKind::Alignment);
3381 SourceLocation Loc = Arg->getExprLoc();
3382 // Strip an implicit cast.
3383 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3384 if (CE->getCastKind() == CK_BitCast)
3385 Arg = CE->getSubExpr();
3386 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3387 SkippedChecks);
3388 }
3389 };
3390
3391 switch (BuiltinIDIfNoAsmLabel) {
3392 default: break;
3393 case Builtin::BI__builtin___CFStringMakeConstantString:
3394 case Builtin::BI__builtin___NSStringMakeConstantString:
3395 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3396 case Builtin::BI__builtin_stdarg_start:
3397 case Builtin::BI__builtin_va_start:
3398 case Builtin::BI__va_start:
3399 case Builtin::BI__builtin_va_end:
3400 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3401 ? EmitScalarExpr(E->getArg(0))
3402 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3403 BuiltinID != Builtin::BI__builtin_va_end);
3404 return RValue::get(nullptr);
3405 case Builtin::BI__builtin_va_copy: {
3406 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3407 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3408 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3409 {DstPtr, SrcPtr});
3410 return RValue::get(nullptr);
3411 }
3412 case Builtin::BIabs:
3413 case Builtin::BIlabs:
3414 case Builtin::BIllabs:
3415 case Builtin::BI__builtin_abs:
3416 case Builtin::BI__builtin_labs:
3417 case Builtin::BI__builtin_llabs: {
3418 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3419
3420 Value *Result;
3421 switch (getLangOpts().getSignedOverflowBehavior()) {
3423 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3424 break;
3426 if (!SanitizeOverflow) {
3427 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3428 break;
3429 }
3430 [[fallthrough]];
3432 // TODO: Somehow handle the corner case when the address of abs is taken.
3433 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3434 break;
3435 }
3436 return RValue::get(Result);
3437 }
3438 case Builtin::BI__builtin_complex: {
3439 Value *Real = EmitScalarExpr(E->getArg(0));
3440 Value *Imag = EmitScalarExpr(E->getArg(1));
3441 return RValue::getComplex({Real, Imag});
3442 }
3443 case Builtin::BI__builtin_conj:
3444 case Builtin::BI__builtin_conjf:
3445 case Builtin::BI__builtin_conjl:
3446 case Builtin::BIconj:
3447 case Builtin::BIconjf:
3448 case Builtin::BIconjl: {
3449 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3450 Value *Real = ComplexVal.first;
3451 Value *Imag = ComplexVal.second;
3452 Imag = Builder.CreateFNeg(Imag, "neg");
3453 return RValue::getComplex(std::make_pair(Real, Imag));
3454 }
3455 case Builtin::BI__builtin_creal:
3456 case Builtin::BI__builtin_crealf:
3457 case Builtin::BI__builtin_creall:
3458 case Builtin::BIcreal:
3459 case Builtin::BIcrealf:
3460 case Builtin::BIcreall: {
3461 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3462 return RValue::get(ComplexVal.first);
3463 }
3464
3465 case Builtin::BI__builtin_preserve_access_index: {
3466 // Only enabled preserved access index region when debuginfo
3467 // is available as debuginfo is needed to preserve user-level
3468 // access pattern.
3469 if (!getDebugInfo()) {
3470 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3471 return RValue::get(EmitScalarExpr(E->getArg(0)));
3472 }
3473
3474 // Nested builtin_preserve_access_index() not supported
3476 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3477 return RValue::get(EmitScalarExpr(E->getArg(0)));
3478 }
3479
3480 IsInPreservedAIRegion = true;
3481 Value *Res = EmitScalarExpr(E->getArg(0));
3482 IsInPreservedAIRegion = false;
3483 return RValue::get(Res);
3484 }
3485
3486 case Builtin::BI__builtin_cimag:
3487 case Builtin::BI__builtin_cimagf:
3488 case Builtin::BI__builtin_cimagl:
3489 case Builtin::BIcimag:
3490 case Builtin::BIcimagf:
3491 case Builtin::BIcimagl: {
3492 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3493 return RValue::get(ComplexVal.second);
3494 }
3495
3496 case Builtin::BI__builtin_clrsb:
3497 case Builtin::BI__builtin_clrsbl:
3498 case Builtin::BI__builtin_clrsbll: {
3499 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3500 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3501
3502 llvm::Type *ArgType = ArgValue->getType();
3503 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3504
3505 llvm::Type *ResultType = ConvertType(E->getType());
3506 Value *Zero = llvm::Constant::getNullValue(ArgType);
3507 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3508 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3509 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3510 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3511 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3512 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3513 "cast");
3514 return RValue::get(Result);
3515 }
3516 case Builtin::BI__builtin_ctzs:
3517 case Builtin::BI__builtin_ctz:
3518 case Builtin::BI__builtin_ctzl:
3519 case Builtin::BI__builtin_ctzll:
3520 case Builtin::BI__builtin_ctzg: {
3521 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3522 E->getNumArgs() > 1;
3523
3524 Value *ArgValue =
3525 HasFallback ? EmitScalarExpr(E->getArg(0))
3527
3528 llvm::Type *ArgType = ArgValue->getType();
3529 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3530
3531 llvm::Type *ResultType = ConvertType(E->getType());
3532 Value *ZeroUndef =
3533 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3534 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3535 if (Result->getType() != ResultType)
3536 Result =
3537 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3538 if (!HasFallback)
3539 return RValue::get(Result);
3540
3541 Value *Zero = Constant::getNullValue(ArgType);
3542 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3543 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3544 Value *ResultOrFallback =
3545 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3546 return RValue::get(ResultOrFallback);
3547 }
3548 case Builtin::BI__builtin_clzs:
3549 case Builtin::BI__builtin_clz:
3550 case Builtin::BI__builtin_clzl:
3551 case Builtin::BI__builtin_clzll:
3552 case Builtin::BI__builtin_clzg: {
3553 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3554 E->getNumArgs() > 1;
3555
3556 Value *ArgValue =
3557 HasFallback ? EmitScalarExpr(E->getArg(0))
3559
3560 llvm::Type *ArgType = ArgValue->getType();
3561 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3562
3563 llvm::Type *ResultType = ConvertType(E->getType());
3564 Value *ZeroUndef =
3565 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3566 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3567 if (Result->getType() != ResultType)
3568 Result =
3569 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3570 if (!HasFallback)
3571 return RValue::get(Result);
3572
3573 Value *Zero = Constant::getNullValue(ArgType);
3574 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3575 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3576 Value *ResultOrFallback =
3577 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3578 return RValue::get(ResultOrFallback);
3579 }
3580 case Builtin::BI__builtin_ffs:
3581 case Builtin::BI__builtin_ffsl:
3582 case Builtin::BI__builtin_ffsll: {
3583 // ffs(x) -> x ? cttz(x) + 1 : 0
3584 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3585
3586 llvm::Type *ArgType = ArgValue->getType();
3587 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3588
3589 llvm::Type *ResultType = ConvertType(E->getType());
3590 Value *Tmp =
3591 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3592 llvm::ConstantInt::get(ArgType, 1));
3593 Value *Zero = llvm::Constant::getNullValue(ArgType);
3594 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3595 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3596 if (Result->getType() != ResultType)
3597 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3598 "cast");
3599 return RValue::get(Result);
3600 }
3601 case Builtin::BI__builtin_parity:
3602 case Builtin::BI__builtin_parityl:
3603 case Builtin::BI__builtin_parityll: {
3604 // parity(x) -> ctpop(x) & 1
3605 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3606
3607 llvm::Type *ArgType = ArgValue->getType();
3608 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3609
3610 llvm::Type *ResultType = ConvertType(E->getType());
3611 Value *Tmp = Builder.CreateCall(F, ArgValue);
3612 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3613 if (Result->getType() != ResultType)
3614 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3615 "cast");
3616 return RValue::get(Result);
3617 }
3618 case Builtin::BI__lzcnt16:
3619 case Builtin::BI__lzcnt:
3620 case Builtin::BI__lzcnt64: {
3621 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3622
3623 llvm::Type *ArgType = ArgValue->getType();
3624 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3625
3626 llvm::Type *ResultType = ConvertType(E->getType());
3627 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3628 if (Result->getType() != ResultType)
3629 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3630 "cast");
3631 return RValue::get(Result);
3632 }
3633 case Builtin::BI__popcnt16:
3634 case Builtin::BI__popcnt:
3635 case Builtin::BI__popcnt64:
3636 case Builtin::BI__builtin_popcount:
3637 case Builtin::BI__builtin_popcountl:
3638 case Builtin::BI__builtin_popcountll:
3639 case Builtin::BI__builtin_popcountg: {
3640 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3641
3642 llvm::Type *ArgType = ArgValue->getType();
3643 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3644
3645 llvm::Type *ResultType = ConvertType(E->getType());
3646 Value *Result = Builder.CreateCall(F, ArgValue);
3647 if (Result->getType() != ResultType)
3648 Result =
3649 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3650 return RValue::get(Result);
3651 }
3652 case Builtin::BI__builtin_unpredictable: {
3653 // Always return the argument of __builtin_unpredictable. LLVM does not
3654 // handle this builtin. Metadata for this builtin should be added directly
3655 // to instructions such as branches or switches that use it.
3656 return RValue::get(EmitScalarExpr(E->getArg(0)));
3657 }
3658 case Builtin::BI__builtin_expect: {
3659 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3660 llvm::Type *ArgType = ArgValue->getType();
3661
3662 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3663 // Don't generate llvm.expect on -O0 as the backend won't use it for
3664 // anything.
3665 // Note, we still IRGen ExpectedValue because it could have side-effects.
3666 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3667 return RValue::get(ArgValue);
3668
3669 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3670 Value *Result =
3671 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3672 return RValue::get(Result);
3673 }
3674 case Builtin::BI__builtin_expect_with_probability: {
3675 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3676 llvm::Type *ArgType = ArgValue->getType();
3677
3678 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3679 llvm::APFloat Probability(0.0);
3680 const Expr *ProbArg = E->getArg(2);
3681 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3682 assert(EvalSucceed && "probability should be able to evaluate as float");
3683 (void)EvalSucceed;
3684 bool LoseInfo = false;
3685 Probability.convert(llvm::APFloat::IEEEdouble(),
3686 llvm::RoundingMode::Dynamic, &LoseInfo);
3687 llvm::Type *Ty = ConvertType(ProbArg->getType());
3688 Constant *Confidence = ConstantFP::get(Ty, Probability);
3689 // Don't generate llvm.expect.with.probability on -O0 as the backend
3690 // won't use it for anything.
3691 // Note, we still IRGen ExpectedValue because it could have side-effects.
3692 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3693 return RValue::get(ArgValue);
3694
3695 Function *FnExpect =
3696 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3697 Value *Result = Builder.CreateCall(
3698 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3699 return RValue::get(Result);
3700 }
3701 case Builtin::BI__builtin_assume_aligned: {
3702 const Expr *Ptr = E->getArg(0);
3703 Value *PtrValue = EmitScalarExpr(Ptr);
3704 Value *OffsetValue =
3705 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3706
3707 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3708 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3709 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3710 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3711 llvm::Value::MaximumAlignment);
3712
3713 emitAlignmentAssumption(PtrValue, Ptr,
3714 /*The expr loc is sufficient.*/ SourceLocation(),
3715 AlignmentCI, OffsetValue);
3716 return RValue::get(PtrValue);
3717 }
3718 case Builtin::BI__assume:
3719 case Builtin::BI__builtin_assume: {
3720 if (E->getArg(0)->HasSideEffects(getContext()))
3721 return RValue::get(nullptr);
3722
3723 Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0));
3724 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3725 Builder.CreateCall(FnAssume, ArgValue);
3726 return RValue::get(nullptr);
3727 }
3728 case Builtin::BI__builtin_assume_separate_storage: {
3729 const Expr *Arg0 = E->getArg(0);
3730 const Expr *Arg1 = E->getArg(1);
3731
3732 Value *Value0 = EmitScalarExpr(Arg0);
3733 Value *Value1 = EmitScalarExpr(Arg1);
3734
3735 Value *Values[] = {Value0, Value1};
3736 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3737 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3738 return RValue::get(nullptr);
3739 }
3740 case Builtin::BI__builtin_allow_runtime_check: {
3741 StringRef Kind =
3742 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3743 LLVMContext &Ctx = CGM.getLLVMContext();
3744 llvm::Value *Allow = Builder.CreateCall(
3745 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3746 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3747 return RValue::get(Allow);
3748 }
3749 case Builtin::BI__arithmetic_fence: {
3750 // Create the builtin call if FastMath is selected, and the target
3751 // supports the builtin, otherwise just return the argument.
3752 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3753 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3754 bool isArithmeticFenceEnabled =
3755 FMF.allowReassoc() &&
3757 QualType ArgType = E->getArg(0)->getType();
3758 if (ArgType->isComplexType()) {
3759 if (isArithmeticFenceEnabled) {
3760 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3761 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3762 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3763 ConvertType(ElementType));
3764 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3765 ConvertType(ElementType));
3766 return RValue::getComplex(std::make_pair(Real, Imag));
3767 }
3768 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3769 Value *Real = ComplexVal.first;
3770 Value *Imag = ComplexVal.second;
3771 return RValue::getComplex(std::make_pair(Real, Imag));
3772 }
3773 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3774 if (isArithmeticFenceEnabled)
3775 return RValue::get(
3776 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3777 return RValue::get(ArgValue);
3778 }
3779 case Builtin::BI__builtin_bswap16:
3780 case Builtin::BI__builtin_bswap32:
3781 case Builtin::BI__builtin_bswap64:
3782 case Builtin::BI_byteswap_ushort:
3783 case Builtin::BI_byteswap_ulong:
3784 case Builtin::BI_byteswap_uint64: {
3785 return RValue::get(
3786 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3787 }
3788 case Builtin::BI__builtin_bitreverse8:
3789 case Builtin::BI__builtin_bitreverse16:
3790 case Builtin::BI__builtin_bitreverse32:
3791 case Builtin::BI__builtin_bitreverse64: {
3792 return RValue::get(
3793 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3794 }
3795 case Builtin::BI__builtin_rotateleft8:
3796 case Builtin::BI__builtin_rotateleft16:
3797 case Builtin::BI__builtin_rotateleft32:
3798 case Builtin::BI__builtin_rotateleft64:
3799 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3800 case Builtin::BI_rotl16:
3801 case Builtin::BI_rotl:
3802 case Builtin::BI_lrotl:
3803 case Builtin::BI_rotl64:
3804 return emitRotate(E, false);
3805
3806 case Builtin::BI__builtin_rotateright8:
3807 case Builtin::BI__builtin_rotateright16:
3808 case Builtin::BI__builtin_rotateright32:
3809 case Builtin::BI__builtin_rotateright64:
3810 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3811 case Builtin::BI_rotr16:
3812 case Builtin::BI_rotr:
3813 case Builtin::BI_lrotr:
3814 case Builtin::BI_rotr64:
3815 return emitRotate(E, true);
3816
3817 case Builtin::BI__builtin_constant_p: {
3818 llvm::Type *ResultType = ConvertType(E->getType());
3819
3820 const Expr *Arg = E->getArg(0);
3821 QualType ArgType = Arg->getType();
3822 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3823 // and likely a mistake.
3824 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3825 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3826 // Per the GCC documentation, only numeric constants are recognized after
3827 // inlining.
3828 return RValue::get(ConstantInt::get(ResultType, 0));
3829
3830 if (Arg->HasSideEffects(getContext()))
3831 // The argument is unevaluated, so be conservative if it might have
3832 // side-effects.
3833 return RValue::get(ConstantInt::get(ResultType, 0));
3834
3835 Value *ArgValue = EmitScalarExpr(Arg);
3836 if (ArgType->isObjCObjectPointerType()) {
3837 // Convert Objective-C objects to id because we cannot distinguish between
3838 // LLVM types for Obj-C classes as they are opaque.
3839 ArgType = CGM.getContext().getObjCIdType();
3840 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3841 }
3842 Function *F =
3843 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3844 Value *Result = Builder.CreateCall(F, ArgValue);
3845 if (Result->getType() != ResultType)
3846 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3847 return RValue::get(Result);
3848 }
3849 case Builtin::BI__builtin_dynamic_object_size:
3850 case Builtin::BI__builtin_object_size: {
3851 unsigned Type =
3852 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3853 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3854
3855 // We pass this builtin onto the optimizer so that it can figure out the
3856 // object size in more complex cases.
3857 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3858 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3859 /*EmittedE=*/nullptr, IsDynamic));
3860 }
3861 case Builtin::BI__builtin_counted_by_ref: {
3862 // Default to returning '(void *) 0'.
3863 llvm::Value *Result = llvm::ConstantPointerNull::get(
3864 llvm::PointerType::getUnqual(getLLVMContext()));
3865
3866 const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts();
3867
3868 if (auto *UO = dyn_cast<UnaryOperator>(Arg);
3869 UO && UO->getOpcode() == UO_AddrOf) {
3870 Arg = UO->getSubExpr()->IgnoreParenImpCasts();
3871
3872 if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3873 Arg = ASE->getBase()->IgnoreParenImpCasts();
3874 }
3875
3876 if (const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3877 if (auto *CATy =
3878 ME->getMemberDecl()->getType()->getAs<CountAttributedType>();
3879 CATy && CATy->getKind() == CountAttributedType::CountedBy) {
3880 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3881 if (const FieldDecl *CountFD = FAMDecl->findCountedByField())
3882 Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD);
3883 else
3884 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3885 }
3886 }
3887
3888 return RValue::get(Result);
3889 }
3890 case Builtin::BI__builtin_prefetch: {
3891 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3892 // FIXME: Technically these constants should of type 'int', yes?
3893 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3894 llvm::ConstantInt::get(Int32Ty, 0);
3895 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3896 llvm::ConstantInt::get(Int32Ty, 3);
3897 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3898 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3899 Builder.CreateCall(F, {Address, RW, Locality, Data});
3900 return RValue::get(nullptr);
3901 }
3902 case Builtin::BI__builtin_readcyclecounter: {
3903 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3904 return RValue::get(Builder.CreateCall(F));
3905 }
3906 case Builtin::BI__builtin_readsteadycounter: {
3907 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3908 return RValue::get(Builder.CreateCall(F));
3909 }
3910 case Builtin::BI__builtin___clear_cache: {
3911 Value *Begin = EmitScalarExpr(E->getArg(0));
3912 Value *End = EmitScalarExpr(E->getArg(1));
3913 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3914 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3915 }
3916 case Builtin::BI__builtin_trap:
3917 EmitTrapCall(Intrinsic::trap);
3918 return RValue::get(nullptr);
3919 case Builtin::BI__builtin_verbose_trap: {
3920 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3921 if (getDebugInfo()) {
3922 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3923 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3924 *E->getArg(1)->tryEvaluateString(getContext()));
3925 }
3926 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3927 // Currently no attempt is made to prevent traps from being merged.
3928 EmitTrapCall(Intrinsic::trap);
3929 return RValue::get(nullptr);
3930 }
3931 case Builtin::BI__debugbreak:
3932 EmitTrapCall(Intrinsic::debugtrap);
3933 return RValue::get(nullptr);
3934 case Builtin::BI__builtin_unreachable: {
3936
3937 // We do need to preserve an insertion point.
3938 EmitBlock(createBasicBlock("unreachable.cont"));
3939
3940 return RValue::get(nullptr);
3941 }
3942
3943 case Builtin::BI__builtin_powi:
3944 case Builtin::BI__builtin_powif:
3945 case Builtin::BI__builtin_powil: {
3946 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3947 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3948
3949 if (Builder.getIsFPConstrained()) {
3950 // FIXME: llvm.powi has 2 mangling types,
3951 // llvm.experimental.constrained.powi has one.
3952 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3953 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3954 Src0->getType());
3955 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3956 }
3957
3958 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3959 { Src0->getType(), Src1->getType() });
3960 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3961 }
3962 case Builtin::BI__builtin_frexpl: {
3963 // Linux PPC will not be adding additional PPCDoubleDouble support.
3964 // WIP to switch default to IEEE long double. Will emit libcall for
3965 // frexpl instead of legalizing this type in the BE.
3966 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3967 break;
3968 [[fallthrough]];
3969 }
3970 case Builtin::BI__builtin_frexp:
3971 case Builtin::BI__builtin_frexpf:
3972 case Builtin::BI__builtin_frexpf128:
3973 case Builtin::BI__builtin_frexpf16:
3974 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3975 case Builtin::BI__builtin_isgreater:
3976 case Builtin::BI__builtin_isgreaterequal:
3977 case Builtin::BI__builtin_isless:
3978 case Builtin::BI__builtin_islessequal:
3979 case Builtin::BI__builtin_islessgreater:
3980 case Builtin::BI__builtin_isunordered: {
3981 // Ordered comparisons: we know the arguments to these are matching scalar
3982 // floating point values.
3983 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3984 Value *LHS = EmitScalarExpr(E->getArg(0));
3985 Value *RHS = EmitScalarExpr(E->getArg(1));
3986
3987 switch (BuiltinID) {
3988 default: llvm_unreachable("Unknown ordered comparison");
3989 case Builtin::BI__builtin_isgreater:
3990 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3991 break;
3992 case Builtin::BI__builtin_isgreaterequal:
3993 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3994 break;
3995 case Builtin::BI__builtin_isless:
3996 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3997 break;
3998 case Builtin::BI__builtin_islessequal:
3999 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
4000 break;
4001 case Builtin::BI__builtin_islessgreater:
4002 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
4003 break;
4004 case Builtin::BI__builtin_isunordered:
4005 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
4006 break;
4007 }
4008 // ZExt bool to int type.
4009 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
4010 }
4011
4012 case Builtin::BI__builtin_isnan: {
4013 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4014 Value *V = EmitScalarExpr(E->getArg(0));
4015 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4016 return RValue::get(Result);
4017 return RValue::get(
4018 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
4019 ConvertType(E->getType())));
4020 }
4021
4022 case Builtin::BI__builtin_issignaling: {
4023 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4024 Value *V = EmitScalarExpr(E->getArg(0));
4025 return RValue::get(
4026 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
4027 ConvertType(E->getType())));
4028 }
4029
4030 case Builtin::BI__builtin_isinf: {
4031 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4032 Value *V = EmitScalarExpr(E->getArg(0));
4033 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4034 return RValue::get(Result);
4035 return RValue::get(
4036 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
4037 ConvertType(E->getType())));
4038 }
4039
4040 case Builtin::BIfinite:
4041 case Builtin::BI__finite:
4042 case Builtin::BIfinitef:
4043 case Builtin::BI__finitef:
4044 case Builtin::BIfinitel:
4045 case Builtin::BI__finitel:
4046 case Builtin::BI__builtin_isfinite: {
4047 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4048 Value *V = EmitScalarExpr(E->getArg(0));
4049 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4050 return RValue::get(Result);
4051 return RValue::get(
4052 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
4053 ConvertType(E->getType())));
4054 }
4055
4056 case Builtin::BI__builtin_isnormal: {
4057 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4058 Value *V = EmitScalarExpr(E->getArg(0));
4059 return RValue::get(
4060 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
4061 ConvertType(E->getType())));
4062 }
4063
4064 case Builtin::BI__builtin_issubnormal: {
4065 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4066 Value *V = EmitScalarExpr(E->getArg(0));
4067 return RValue::get(
4068 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
4069 ConvertType(E->getType())));
4070 }
4071
4072 case Builtin::BI__builtin_iszero: {
4073 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4074 Value *V = EmitScalarExpr(E->getArg(0));
4075 return RValue::get(
4076 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
4077 ConvertType(E->getType())));
4078 }
4079
4080 case Builtin::BI__builtin_isfpclass: {
4082 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
4083 break;
4084 uint64_t Test = Result.Val.getInt().getLimitedValue();
4085 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4086 Value *V = EmitScalarExpr(E->getArg(0));
4087 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
4088 ConvertType(E->getType())));
4089 }
4090
4091 case Builtin::BI__builtin_nondeterministic_value: {
4092 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
4093
4094 Value *Result = PoisonValue::get(Ty);
4095 Result = Builder.CreateFreeze(Result);
4096
4097 return RValue::get(Result);
4098 }
4099
4100 case Builtin::BI__builtin_elementwise_abs: {
4101 Value *Result;
4102 QualType QT = E->getArg(0)->getType();
4103
4104 if (auto *VecTy = QT->getAs<VectorType>())
4105 QT = VecTy->getElementType();
4106 if (QT->isIntegerType())
4107 Result = Builder.CreateBinaryIntrinsic(
4108 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
4109 Builder.getFalse(), nullptr, "elt.abs");
4110 else
4111 Result = emitBuiltinWithOneOverloadedType<1>(
4112 *this, E, llvm::Intrinsic::fabs, "elt.abs");
4113
4114 return RValue::get(Result);
4115 }
4116 case Builtin::BI__builtin_elementwise_acos:
4117 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4118 *this, E, llvm::Intrinsic::acos, "elt.acos"));
4119 case Builtin::BI__builtin_elementwise_asin:
4120 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4121 *this, E, llvm::Intrinsic::asin, "elt.asin"));
4122 case Builtin::BI__builtin_elementwise_atan:
4123 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4124 *this, E, llvm::Intrinsic::atan, "elt.atan"));
4125 case Builtin::BI__builtin_elementwise_atan2:
4126 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4127 *this, E, llvm::Intrinsic::atan2, "elt.atan2"));
4128 case Builtin::BI__builtin_elementwise_ceil:
4129 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4130 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
4131 case Builtin::BI__builtin_elementwise_exp:
4132 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4133 *this, E, llvm::Intrinsic::exp, "elt.exp"));
4134 case Builtin::BI__builtin_elementwise_exp2:
4135 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4136 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
4137 case Builtin::BI__builtin_elementwise_log:
4138 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4139 *this, E, llvm::Intrinsic::log, "elt.log"));
4140 case Builtin::BI__builtin_elementwise_log2:
4141 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4142 *this, E, llvm::Intrinsic::log2, "elt.log2"));
4143 case Builtin::BI__builtin_elementwise_log10:
4144 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4145 *this, E, llvm::Intrinsic::log10, "elt.log10"));
4146 case Builtin::BI__builtin_elementwise_pow: {
4147 return RValue::get(
4148 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
4149 }
4150 case Builtin::BI__builtin_elementwise_bitreverse:
4151 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4152 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
4153 case Builtin::BI__builtin_elementwise_cos:
4154 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4155 *this, E, llvm::Intrinsic::cos, "elt.cos"));
4156 case Builtin::BI__builtin_elementwise_cosh:
4157 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4158 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
4159 case Builtin::BI__builtin_elementwise_floor:
4160 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4161 *this, E, llvm::Intrinsic::floor, "elt.floor"));
4162 case Builtin::BI__builtin_elementwise_popcount:
4163 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4164 *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
4165 case Builtin::BI__builtin_elementwise_roundeven:
4166 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4167 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
4168 case Builtin::BI__builtin_elementwise_round:
4169 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4170 *this, E, llvm::Intrinsic::round, "elt.round"));
4171 case Builtin::BI__builtin_elementwise_rint:
4172 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4173 *this, E, llvm::Intrinsic::rint, "elt.rint"));
4174 case Builtin::BI__builtin_elementwise_nearbyint:
4175 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4176 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
4177 case Builtin::BI__builtin_elementwise_sin:
4178 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4179 *this, E, llvm::Intrinsic::sin, "elt.sin"));
4180 case Builtin::BI__builtin_elementwise_sinh:
4181 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4182 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
4183 case Builtin::BI__builtin_elementwise_tan:
4184 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4185 *this, E, llvm::Intrinsic::tan, "elt.tan"));
4186 case Builtin::BI__builtin_elementwise_tanh:
4187 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4188 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
4189 case Builtin::BI__builtin_elementwise_trunc:
4190 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4191 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
4192 case Builtin::BI__builtin_elementwise_canonicalize:
4193 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4194 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
4195 case Builtin::BI__builtin_elementwise_copysign:
4196 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4197 *this, E, llvm::Intrinsic::copysign));
4198 case Builtin::BI__builtin_elementwise_fma:
4199 return RValue::get(
4200 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
4201 case Builtin::BI__builtin_elementwise_add_sat:
4202 case Builtin::BI__builtin_elementwise_sub_sat: {
4203 Value *Op0 = EmitScalarExpr(E->getArg(0));
4204 Value *Op1 = EmitScalarExpr(E->getArg(1));
4205 Value *Result;
4206 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
4207 QualType Ty = E->getArg(0)->getType();
4208 if (auto *VecTy = Ty->getAs<VectorType>())
4209 Ty = VecTy->getElementType();
4210 bool IsSigned = Ty->isSignedIntegerType();
4211 unsigned Opc;
4212 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4213 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4214 else
4215 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4216 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
4217 return RValue::get(Result);
4218 }
4219
4220 case Builtin::BI__builtin_elementwise_max: {
4221 Value *Op0 = EmitScalarExpr(E->getArg(0));
4222 Value *Op1 = EmitScalarExpr(E->getArg(1));
4223 Value *Result;
4224 if (Op0->getType()->isIntOrIntVectorTy()) {
4225 QualType Ty = E->getArg(0)->getType();
4226 if (auto *VecTy = Ty->getAs<VectorType>())
4227 Ty = VecTy->getElementType();
4228 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4229 ? llvm::Intrinsic::smax
4230 : llvm::Intrinsic::umax,
4231 Op0, Op1, nullptr, "elt.max");
4232 } else
4233 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
4234 return RValue::get(Result);
4235 }
4236 case Builtin::BI__builtin_elementwise_min: {
4237 Value *Op0 = EmitScalarExpr(E->getArg(0));
4238 Value *Op1 = EmitScalarExpr(E->getArg(1));
4239 Value *Result;
4240 if (Op0->getType()->isIntOrIntVectorTy()) {
4241 QualType Ty = E->getArg(0)->getType();
4242 if (auto *VecTy = Ty->getAs<VectorType>())
4243 Ty = VecTy->getElementType();
4244 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4245 ? llvm::Intrinsic::smin
4246 : llvm::Intrinsic::umin,
4247 Op0, Op1, nullptr, "elt.min");
4248 } else
4249 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
4250 return RValue::get(Result);
4251 }
4252
4253 case Builtin::BI__builtin_elementwise_maximum: {
4254 Value *Op0 = EmitScalarExpr(E->getArg(0));
4255 Value *Op1 = EmitScalarExpr(E->getArg(1));
4256 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
4257 Op1, nullptr, "elt.maximum");
4258 return RValue::get(Result);
4259 }
4260
4261 case Builtin::BI__builtin_elementwise_minimum: {
4262 Value *Op0 = EmitScalarExpr(E->getArg(0));
4263 Value *Op1 = EmitScalarExpr(E->getArg(1));
4264 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
4265 Op1, nullptr, "elt.minimum");
4266 return RValue::get(Result);
4267 }
4268
4269 case Builtin::BI__builtin_reduce_max: {
4270 auto GetIntrinsicID = [this](QualType QT) {
4271 if (auto *VecTy = QT->getAs<VectorType>())
4272 QT = VecTy->getElementType();
4273 else if (QT->isSizelessVectorType())
4275
4276 if (QT->isSignedIntegerType())
4277 return llvm::Intrinsic::vector_reduce_smax;
4278 if (QT->isUnsignedIntegerType())
4279 return llvm::Intrinsic::vector_reduce_umax;
4280 assert(QT->isFloatingType() && "must have a float here");
4281 return llvm::Intrinsic::vector_reduce_fmax;
4282 };
4283 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4284 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4285 }
4286
4287 case Builtin::BI__builtin_reduce_min: {
4288 auto GetIntrinsicID = [this](QualType QT) {
4289 if (auto *VecTy = QT->getAs<VectorType>())
4290 QT = VecTy->getElementType();
4291 else if (QT->isSizelessVectorType())
4293
4294 if (QT->isSignedIntegerType())
4295 return llvm::Intrinsic::vector_reduce_smin;
4296 if (QT->isUnsignedIntegerType())
4297 return llvm::Intrinsic::vector_reduce_umin;
4298 assert(QT->isFloatingType() && "must have a float here");
4299 return llvm::Intrinsic::vector_reduce_fmin;
4300 };
4301
4302 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4303 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4304 }
4305
4306 case Builtin::BI__builtin_reduce_add:
4307 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4308 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4309 case Builtin::BI__builtin_reduce_mul:
4310 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4311 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4312 case Builtin::BI__builtin_reduce_xor:
4313 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4314 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4315 case Builtin::BI__builtin_reduce_or:
4316 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4317 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4318 case Builtin::BI__builtin_reduce_and:
4319 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4320 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4321 case Builtin::BI__builtin_reduce_maximum:
4322 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4323 *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4324 case Builtin::BI__builtin_reduce_minimum:
4325 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4326 *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
4327
4328 case Builtin::BI__builtin_matrix_transpose: {
4329 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4330 Value *MatValue = EmitScalarExpr(E->getArg(0));
4331 MatrixBuilder MB(Builder);
4332 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4333 MatrixTy->getNumColumns());
4334 return RValue::get(Result);
4335 }
4336
4337 case Builtin::BI__builtin_matrix_column_major_load: {
4338 MatrixBuilder MB(Builder);
4339 // Emit everything that isn't dependent on the first parameter type
4340 Value *Stride = EmitScalarExpr(E->getArg(3));
4341 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4342 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4343 assert(PtrTy && "arg0 must be of pointer type");
4344 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4345
4346 Address Src = EmitPointerWithAlignment(E->getArg(0));
4348 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4349 0);
4350 Value *Result = MB.CreateColumnMajorLoad(
4351 Src.getElementType(), Src.emitRawPointer(*this),
4352 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4353 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4354 return RValue::get(Result);
4355 }
4356
4357 case Builtin::BI__builtin_matrix_column_major_store: {
4358 MatrixBuilder MB(Builder);
4359 Value *Matrix = EmitScalarExpr(E->getArg(0));
4360 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4361 Value *Stride = EmitScalarExpr(E->getArg(2));
4362
4363 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4364 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4365 assert(PtrTy && "arg1 must be of pointer type");
4366 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4367
4369 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4370 0);
4371 Value *Result = MB.CreateColumnMajorStore(
4372 Matrix, Dst.emitRawPointer(*this),
4373 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4374 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4375 return RValue::get(Result);
4376 }
4377
4378 case Builtin::BI__builtin_isinf_sign: {
4379 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4380 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4381 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4382 Value *Arg = EmitScalarExpr(E->getArg(0));
4383 Value *AbsArg = EmitFAbs(*this, Arg);
4384 Value *IsInf = Builder.CreateFCmpOEQ(
4385 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4386 Value *IsNeg = EmitSignBit(*this, Arg);
4387
4388 llvm::Type *IntTy = ConvertType(E->getType());
4389 Value *Zero = Constant::getNullValue(IntTy);
4390 Value *One = ConstantInt::get(IntTy, 1);
4391 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4392 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4393 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4394 return RValue::get(Result);
4395 }
4396
4397 case Builtin::BI__builtin_flt_rounds: {
4398 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4399
4400 llvm::Type *ResultType = ConvertType(E->getType());
4401 Value *Result = Builder.CreateCall(F);
4402 if (Result->getType() != ResultType)
4403 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4404 "cast");
4405 return RValue::get(Result);
4406 }
4407
4408 case Builtin::BI__builtin_set_flt_rounds: {
4409 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4410
4411 Value *V = EmitScalarExpr(E->getArg(0));
4412 Builder.CreateCall(F, V);
4413 return RValue::get(nullptr);
4414 }
4415
4416 case Builtin::BI__builtin_fpclassify: {
4417 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4418 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4419 Value *V = EmitScalarExpr(E->getArg(5));
4420 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4421
4422 // Create Result
4423 BasicBlock *Begin = Builder.GetInsertBlock();
4424 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4425 Builder.SetInsertPoint(End);
4426 PHINode *Result =
4427 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4428 "fpclassify_result");
4429
4430 // if (V==0) return FP_ZERO
4431 Builder.SetInsertPoint(Begin);
4432 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4433 "iszero");
4434 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4435 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4436 Builder.CreateCondBr(IsZero, End, NotZero);
4437 Result->addIncoming(ZeroLiteral, Begin);
4438
4439 // if (V != V) return FP_NAN
4440 Builder.SetInsertPoint(NotZero);
4441 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4442 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4443 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4444 Builder.CreateCondBr(IsNan, End, NotNan);
4445 Result->addIncoming(NanLiteral, NotZero);
4446
4447 // if (fabs(V) == infinity) return FP_INFINITY
4448 Builder.SetInsertPoint(NotNan);
4449 Value *VAbs = EmitFAbs(*this, V);
4450 Value *IsInf =
4451 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4452 "isinf");
4453 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4454 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4455 Builder.CreateCondBr(IsInf, End, NotInf);
4456 Result->addIncoming(InfLiteral, NotNan);
4457
4458 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4459 Builder.SetInsertPoint(NotInf);
4460 APFloat Smallest = APFloat::getSmallestNormalized(
4461 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4462 Value *IsNormal =
4463 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4464 "isnormal");
4465 Value *NormalResult =
4466 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4467 EmitScalarExpr(E->getArg(3)));
4468 Builder.CreateBr(End);
4469 Result->addIncoming(NormalResult, NotInf);
4470
4471 // return Result
4472 Builder.SetInsertPoint(End);
4473 return RValue::get(Result);
4474 }
4475
4476 // An alloca will always return a pointer to the alloca (stack) address
4477 // space. This address space need not be the same as the AST / Language
4478 // default (e.g. in C / C++ auto vars are in the generic address space). At
4479 // the AST level this is handled within CreateTempAlloca et al., but for the
4480 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4481 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4482 case Builtin::BIalloca:
4483 case Builtin::BI_alloca:
4484 case Builtin::BI__builtin_alloca_uninitialized:
4485 case Builtin::BI__builtin_alloca: {
4486 Value *Size = EmitScalarExpr(E->getArg(0));
4487 const TargetInfo &TI = getContext().getTargetInfo();
4488 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4489 const Align SuitableAlignmentInBytes =
4490 CGM.getContext()
4492 .getAsAlign();
4493 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4494 AI->setAlignment(SuitableAlignmentInBytes);
4495 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4496 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4499 if (AAS != EAS) {
4500 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4501 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4502 EAS, Ty));
4503 }
4504 return RValue::get(AI);
4505 }
4506
4507 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4508 case Builtin::BI__builtin_alloca_with_align: {
4509 Value *Size = EmitScalarExpr(E->getArg(0));
4510 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4511 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4512 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4513 const Align AlignmentInBytes =
4514 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4515 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4516 AI->setAlignment(AlignmentInBytes);
4517 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4518 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4521 if (AAS != EAS) {
4522 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4523 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4524 EAS, Ty));
4525 }
4526 return RValue::get(AI);
4527 }
4528
4529 case Builtin::BIbzero:
4530 case Builtin::BI__builtin_bzero: {
4531 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4532 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4533 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4534 E->getArg(0)->getExprLoc(), FD, 0);
4535 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4536 return RValue::get(nullptr);
4537 }
4538
4539 case Builtin::BIbcopy:
4540 case Builtin::BI__builtin_bcopy: {
4541 Address Src = EmitPointerWithAlignment(E->getArg(0));
4542 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4543 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4545 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4546 0);
4548 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4549 0);
4550 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4551 return RValue::get(nullptr);
4552 }
4553
4554 case Builtin::BImemcpy:
4555 case Builtin::BI__builtin_memcpy:
4556 case Builtin::BImempcpy:
4557 case Builtin::BI__builtin_mempcpy: {
4558 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4559 Address Src = EmitPointerWithAlignment(E->getArg(1));
4560 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4561 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4562 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4563 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4564 if (BuiltinID == Builtin::BImempcpy ||
4565 BuiltinID == Builtin::BI__builtin_mempcpy)
4567 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4568 else
4569 return RValue::get(Dest, *this);
4570 }
4571
4572 case Builtin::BI__builtin_memcpy_inline: {
4573 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4574 Address Src = EmitPointerWithAlignment(E->getArg(1));
4575 uint64_t Size =
4576 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4577 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4578 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4579 Builder.CreateMemCpyInline(Dest, Src, Size);
4580 return RValue::get(nullptr);
4581 }
4582
4583 case Builtin::BI__builtin_char_memchr:
4584 BuiltinID = Builtin::BI__builtin_memchr;
4585 break;
4586
4587 case Builtin::BI__builtin___memcpy_chk: {
4588 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4589 Expr::EvalResult SizeResult, DstSizeResult;
4590 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4591 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4592 break;
4593 llvm::APSInt Size = SizeResult.Val.getInt();
4594 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4595 if (Size.ugt(DstSize))
4596 break;
4597 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4598 Address Src = EmitPointerWithAlignment(E->getArg(1));
4599 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4600 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4601 return RValue::get(Dest, *this);
4602 }
4603
4604 case Builtin::BI__builtin_objc_memmove_collectable: {
4605 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4606 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4607 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4609 DestAddr, SrcAddr, SizeVal);
4610 return RValue::get(DestAddr, *this);
4611 }
4612
4613 case Builtin::BI__builtin___memmove_chk: {
4614 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4615 Expr::EvalResult SizeResult, DstSizeResult;
4616 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4617 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4618 break;
4619 llvm::APSInt Size = SizeResult.Val.getInt();
4620 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4621 if (Size.ugt(DstSize))
4622 break;
4623 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4624 Address Src = EmitPointerWithAlignment(E->getArg(1));
4625 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4626 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4627 return RValue::get(Dest, *this);
4628 }
4629
4630 case Builtin::BImemmove:
4631 case Builtin::BI__builtin_memmove: {
4632 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4633 Address Src = EmitPointerWithAlignment(E->getArg(1));
4634 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4635 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4636 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4637 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4638 return RValue::get(Dest, *this);
4639 }
4640 case Builtin::BImemset:
4641 case Builtin::BI__builtin_memset: {
4642 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4643 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4644 Builder.getInt8Ty());
4645 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4646 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4647 E->getArg(0)->getExprLoc(), FD, 0);
4648 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4649 return RValue::get(Dest, *this);
4650 }
4651 case Builtin::BI__builtin_memset_inline: {
4652 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4653 Value *ByteVal =
4654 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4655 uint64_t Size =
4656 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4658 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4659 0);
4660 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4661 return RValue::get(nullptr);
4662 }
4663 case Builtin::BI__builtin___memset_chk: {
4664 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4665 Expr::EvalResult SizeResult, DstSizeResult;
4666 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4667 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4668 break;
4669 llvm::APSInt Size = SizeResult.Val.getInt();
4670 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4671 if (Size.ugt(DstSize))
4672 break;
4673 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4674 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4675 Builder.getInt8Ty());
4676 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4677 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4678 return RValue::get(Dest, *this);
4679 }
4680 case Builtin::BI__builtin_wmemchr: {
4681 // The MSVC runtime library does not provide a definition of wmemchr, so we
4682 // need an inline implementation.
4683 if (!getTarget().getTriple().isOSMSVCRT())
4684 break;
4685
4686 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4687 Value *Str = EmitScalarExpr(E->getArg(0));
4688 Value *Chr = EmitScalarExpr(E->getArg(1));
4689 Value *Size = EmitScalarExpr(E->getArg(2));
4690
4691 BasicBlock *Entry = Builder.GetInsertBlock();
4692 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4693 BasicBlock *Next = createBasicBlock("wmemchr.next");
4694 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4695 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4696 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4697
4698 EmitBlock(CmpEq);
4699 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4700 StrPhi->addIncoming(Str, Entry);
4701 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4702 SizePhi->addIncoming(Size, Entry);
4703 CharUnits WCharAlign =
4705 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4706 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4707 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4708 Builder.CreateCondBr(StrEqChr, Exit, Next);
4709
4710 EmitBlock(Next);
4711 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4712 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4713 Value *NextSizeEq0 =
4714 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4715 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4716 StrPhi->addIncoming(NextStr, Next);
4717 SizePhi->addIncoming(NextSize, Next);
4718
4719 EmitBlock(Exit);
4720 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4721 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4722 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4723 Ret->addIncoming(FoundChr, CmpEq);
4724 return RValue::get(Ret);
4725 }
4726 case Builtin::BI__builtin_wmemcmp: {
4727 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4728 // need an inline implementation.
4729 if (!getTarget().getTriple().isOSMSVCRT())
4730 break;
4731
4732 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4733
4734 Value *Dst = EmitScalarExpr(E->getArg(0));
4735 Value *Src = EmitScalarExpr(E->getArg(1));
4736 Value *Size = EmitScalarExpr(E->getArg(2));
4737
4738 BasicBlock *Entry = Builder.GetInsertBlock();
4739 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4740 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4741 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4742 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4743 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4744 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4745
4746 EmitBlock(CmpGT);
4747 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4748 DstPhi->addIncoming(Dst, Entry);
4749 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4750 SrcPhi->addIncoming(Src, Entry);
4751 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4752 SizePhi->addIncoming(Size, Entry);
4753 CharUnits WCharAlign =
4755 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4756 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4757 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4758 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4759
4760 EmitBlock(CmpLT);
4761 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4762 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4763
4764 EmitBlock(Next);
4765 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4766 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4767 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4768 Value *NextSizeEq0 =
4769 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4770 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4771 DstPhi->addIncoming(NextDst, Next);
4772 SrcPhi->addIncoming(NextSrc, Next);
4773 SizePhi->addIncoming(NextSize, Next);
4774
4775 EmitBlock(Exit);
4776 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4777 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4778 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4779 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4780 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4781 return RValue::get(Ret);
4782 }
4783 case Builtin::BI__builtin_dwarf_cfa: {
4784 // The offset in bytes from the first argument to the CFA.
4785 //
4786 // Why on earth is this in the frontend? Is there any reason at
4787 // all that the backend can't reasonably determine this while
4788 // lowering llvm.eh.dwarf.cfa()?
4789 //
4790 // TODO: If there's a satisfactory reason, add a target hook for
4791 // this instead of hard-coding 0, which is correct for most targets.
4792 int32_t Offset = 0;
4793
4794 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4795 return RValue::get(Builder.CreateCall(F,
4796 llvm::ConstantInt::get(Int32Ty, Offset)));
4797 }
4798 case Builtin::BI__builtin_return_address: {
4799 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4800 getContext().UnsignedIntTy);
4801 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4802 return RValue::get(Builder.CreateCall(F, Depth));
4803 }
4804 case Builtin::BI_ReturnAddress: {
4805 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4806 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4807 }
4808 case Builtin::BI__builtin_frame_address: {
4809 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4810 getContext().UnsignedIntTy);
4811 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4812 return RValue::get(Builder.CreateCall(F, Depth));
4813 }
4814 case Builtin::BI__builtin_extract_return_addr: {
4815 Value *Address = EmitScalarExpr(E->getArg(0));
4817 return RValue::get(Result);
4818 }
4819 case Builtin::BI__builtin_frob_return_addr: {
4820 Value *Address = EmitScalarExpr(E->getArg(0));
4822 return RValue::get(Result);
4823 }
4824 case Builtin::BI__builtin_dwarf_sp_column: {
4825 llvm::IntegerType *Ty
4826 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4828 if (Column == -1) {
4829 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4830 return RValue::get(llvm::UndefValue::get(Ty));
4831 }
4832 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4833 }
4834 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4835 Value *Address = EmitScalarExpr(E->getArg(0));
4836 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4837 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4838 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4839 }
4840 case Builtin::BI__builtin_eh_return: {
4841 Value *Int = EmitScalarExpr(E->getArg(0));
4842 Value *Ptr = EmitScalarExpr(E->getArg(1));
4843
4844 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4845 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4846 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4847 Function *F =
4848 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4849 : Intrinsic::eh_return_i64);
4850 Builder.CreateCall(F, {Int, Ptr});
4851 Builder.CreateUnreachable();
4852
4853 // We do need to preserve an insertion point.
4854 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4855
4856 return RValue::get(nullptr);
4857 }
4858 case Builtin::BI__builtin_unwind_init: {
4859 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4860 Builder.CreateCall(F);
4861 return RValue::get(nullptr);
4862 }
4863 case Builtin::BI__builtin_extend_pointer: {
4864 // Extends a pointer to the size of an _Unwind_Word, which is
4865 // uint64_t on all platforms. Generally this gets poked into a
4866 // register and eventually used as an address, so if the
4867 // addressing registers are wider than pointers and the platform
4868 // doesn't implicitly ignore high-order bits when doing
4869 // addressing, we need to make sure we zext / sext based on
4870 // the platform's expectations.
4871 //
4872 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4873
4874 // Cast the pointer to intptr_t.
4875 Value *Ptr = EmitScalarExpr(E->getArg(0));
4876 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4877
4878 // If that's 64 bits, we're done.
4879 if (IntPtrTy->getBitWidth() == 64)
4880 return RValue::get(Result);
4881
4882 // Otherwise, ask the codegen data what to do.
4883 if (getTargetHooks().extendPointerWithSExt())
4884 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4885 else
4886 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4887 }
4888 case Builtin::BI__builtin_setjmp: {
4889 // Buffer is a void**.
4890 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4891
4892 if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4893 // On this target, the back end fills in the context buffer completely.
4894 // It doesn't really matter if the frontend stores to the buffer before
4895 // calling setjmp, the back-end is going to overwrite them anyway.
4896 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4897 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4898 }
4899
4900 // Store the frame pointer to the setjmp buffer.
4901 Value *FrameAddr = Builder.CreateCall(
4902 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4903 ConstantInt::get(Int32Ty, 0));
4904 Builder.CreateStore(FrameAddr, Buf);
4905
4906 // Store the stack pointer to the setjmp buffer.
4907 Value *StackAddr = Builder.CreateStackSave();
4908 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4909
4910 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4911 Builder.CreateStore(StackAddr, StackSaveSlot);
4912
4913 // Call LLVM's EH setjmp, which is lightweight.
4914 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4915 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4916 }
4917 case Builtin::BI__builtin_longjmp: {
4918 Value *Buf = EmitScalarExpr(E->getArg(0));
4919
4920 // Call LLVM's EH longjmp, which is lightweight.
4921 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4922
4923 // longjmp doesn't return; mark this as unreachable.
4924 Builder.CreateUnreachable();
4925
4926 // We do need to preserve an insertion point.
4927 EmitBlock(createBasicBlock("longjmp.cont"));
4928
4929 return RValue::get(nullptr);
4930 }
4931 case Builtin::BI__builtin_launder: {
4932 const Expr *Arg = E->getArg(0);
4933 QualType ArgTy = Arg->getType()->getPointeeType();
4934 Value *Ptr = EmitScalarExpr(Arg);
4935 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4937
4938 return RValue::get(Ptr);
4939 }
4940 case Builtin::BI__sync_fetch_and_add:
4941 case Builtin::BI__sync_fetch_and_sub:
4942 case Builtin::BI__sync_fetch_and_or:
4943 case Builtin::BI__sync_fetch_and_and:
4944 case Builtin::BI__sync_fetch_and_xor:
4945 case Builtin::BI__sync_fetch_and_nand:
4946 case Builtin::BI__sync_add_and_fetch:
4947 case Builtin::BI__sync_sub_and_fetch:
4948 case Builtin::BI__sync_and_and_fetch:
4949 case Builtin::BI__sync_or_and_fetch:
4950 case Builtin::BI__sync_xor_and_fetch:
4951 case Builtin::BI__sync_nand_and_fetch:
4952 case Builtin::BI__sync_val_compare_and_swap:
4953 case Builtin::BI__sync_bool_compare_and_swap:
4954 case Builtin::BI__sync_lock_test_and_set:
4955 case Builtin::BI__sync_lock_release:
4956 case Builtin::BI__sync_swap:
4957 llvm_unreachable("Shouldn't make it through sema");
4958 case Builtin::BI__sync_fetch_and_add_1:
4959 case Builtin::BI__sync_fetch_and_add_2:
4960 case Builtin::BI__sync_fetch_and_add_4:
4961 case Builtin::BI__sync_fetch_and_add_8:
4962 case Builtin::BI__sync_fetch_and_add_16:
4963 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4964 case Builtin::BI__sync_fetch_and_sub_1:
4965 case Builtin::BI__sync_fetch_and_sub_2:
4966 case Builtin::BI__sync_fetch_and_sub_4:
4967 case Builtin::BI__sync_fetch_and_sub_8:
4968 case Builtin::BI__sync_fetch_and_sub_16:
4969 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4970 case Builtin::BI__sync_fetch_and_or_1:
4971 case Builtin::BI__sync_fetch_and_or_2:
4972 case Builtin::BI__sync_fetch_and_or_4:
4973 case Builtin::BI__sync_fetch_and_or_8:
4974 case Builtin::BI__sync_fetch_and_or_16:
4975 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4976 case Builtin::BI__sync_fetch_and_and_1:
4977 case Builtin::BI__sync_fetch_and_and_2:
4978 case Builtin::BI__sync_fetch_and_and_4:
4979 case Builtin::BI__sync_fetch_and_and_8:
4980 case Builtin::BI__sync_fetch_and_and_16:
4981 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4982 case Builtin::BI__sync_fetch_and_xor_1:
4983 case Builtin::BI__sync_fetch_and_xor_2:
4984 case Builtin::BI__sync_fetch_and_xor_4:
4985 case Builtin::BI__sync_fetch_and_xor_8:
4986 case Builtin::BI__sync_fetch_and_xor_16:
4987 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4988 case Builtin::BI__sync_fetch_and_nand_1:
4989 case Builtin::BI__sync_fetch_and_nand_2:
4990 case Builtin::BI__sync_fetch_and_nand_4:
4991 case Builtin::BI__sync_fetch_and_nand_8:
4992 case Builtin::BI__sync_fetch_and_nand_16:
4993 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4994
4995 // Clang extensions: not overloaded yet.
4996 case Builtin::BI__sync_fetch_and_min:
4997 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4998 case Builtin::BI__sync_fetch_and_max:
4999 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
5000 case Builtin::BI__sync_fetch_and_umin:
5001 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
5002 case Builtin::BI__sync_fetch_and_umax:
5003 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
5004
5005 case Builtin::BI__sync_add_and_fetch_1:
5006 case Builtin::BI__sync_add_and_fetch_2:
5007 case Builtin::BI__sync_add_and_fetch_4:
5008 case Builtin::BI__sync_add_and_fetch_8:
5009 case Builtin::BI__sync_add_and_fetch_16:
5010 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
5011 llvm::Instruction::Add);
5012 case Builtin::BI__sync_sub_and_fetch_1:
5013 case Builtin::BI__sync_sub_and_fetch_2:
5014 case Builtin::BI__sync_sub_and_fetch_4:
5015 case Builtin::BI__sync_sub_and_fetch_8:
5016 case Builtin::BI__sync_sub_and_fetch_16:
5017 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
5018 llvm::Instruction::Sub);
5019 case Builtin::BI__sync_and_and_fetch_1:
5020 case Builtin::BI__sync_and_and_fetch_2:
5021 case Builtin::BI__sync_and_and_fetch_4:
5022 case Builtin::BI__sync_and_and_fetch_8:
5023 case Builtin::BI__sync_and_and_fetch_16:
5024 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
5025 llvm::Instruction::And);
5026 case Builtin::BI__sync_or_and_fetch_1:
5027 case Builtin::BI__sync_or_and_fetch_2:
5028 case Builtin::BI__sync_or_and_fetch_4:
5029 case Builtin::BI__sync_or_and_fetch_8:
5030 case Builtin::BI__sync_or_and_fetch_16:
5031 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
5032 llvm::Instruction::Or);
5033 case Builtin::BI__sync_xor_and_fetch_1:
5034 case Builtin::BI__sync_xor_and_fetch_2:
5035 case Builtin::BI__sync_xor_and_fetch_4:
5036 case Builtin::BI__sync_xor_and_fetch_8:
5037 case Builtin::BI__sync_xor_and_fetch_16:
5038 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
5039 llvm::Instruction::Xor);
5040 case Builtin::BI__sync_nand_and_fetch_1:
5041 case Builtin::BI__sync_nand_and_fetch_2:
5042 case Builtin::BI__sync_nand_and_fetch_4:
5043 case Builtin::BI__sync_nand_and_fetch_8:
5044 case Builtin::BI__sync_nand_and_fetch_16:
5045 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
5046 llvm::Instruction::And, true);
5047
5048 case Builtin::BI__sync_val_compare_and_swap_1:
5049 case Builtin::BI__sync_val_compare_and_swap_2:
5050 case Builtin::BI__sync_val_compare_and_swap_4:
5051 case Builtin::BI__sync_val_compare_and_swap_8:
5052 case Builtin::BI__sync_val_compare_and_swap_16:
5053 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
5054
5055 case Builtin::BI__sync_bool_compare_and_swap_1:
5056 case Builtin::BI__sync_bool_compare_and_swap_2:
5057 case Builtin::BI__sync_bool_compare_and_swap_4:
5058 case Builtin::BI__sync_bool_compare_and_swap_8:
5059 case Builtin::BI__sync_bool_compare_and_swap_16:
5060 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
5061
5062 case Builtin::BI__sync_swap_1:
5063 case Builtin::BI__sync_swap_2:
5064 case Builtin::BI__sync_swap_4:
5065 case Builtin::BI__sync_swap_8:
5066 case Builtin::BI__sync_swap_16:
5067 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5068
5069 case Builtin::BI__sync_lock_test_and_set_1:
5070 case Builtin::BI__sync_lock_test_and_set_2:
5071 case Builtin::BI__sync_lock_test_and_set_4:
5072 case Builtin::BI__sync_lock_test_and_set_8:
5073 case Builtin::BI__sync_lock_test_and_set_16:
5074 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5075
5076 case Builtin::BI__sync_lock_release_1:
5077 case Builtin::BI__sync_lock_release_2:
5078 case Builtin::BI__sync_lock_release_4:
5079 case Builtin::BI__sync_lock_release_8:
5080 case Builtin::BI__sync_lock_release_16: {
5081 Address Ptr = CheckAtomicAlignment(*this, E);
5082 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5083
5084 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5085 getContext().getTypeSize(ElTy));
5086 llvm::StoreInst *Store =
5087 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
5088 Store->setAtomic(llvm::AtomicOrdering::Release);
5089 return RValue::get(nullptr);
5090 }
5091
5092 case Builtin::BI__sync_synchronize: {
5093 // We assume this is supposed to correspond to a C++0x-style
5094 // sequentially-consistent fence (i.e. this is only usable for
5095 // synchronization, not device I/O or anything like that). This intrinsic
5096 // is really badly designed in the sense that in theory, there isn't
5097 // any way to safely use it... but in practice, it mostly works
5098 // to use it with non-atomic loads and stores to get acquire/release
5099 // semantics.
5100 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5101 return RValue::get(nullptr);
5102 }
5103
5104 case Builtin::BI__builtin_nontemporal_load:
5105 return RValue::get(EmitNontemporalLoad(*this, E));
5106 case Builtin::BI__builtin_nontemporal_store:
5107 return RValue::get(EmitNontemporalStore(*this, E));
5108 case Builtin::BI__c11_atomic_is_lock_free:
5109 case Builtin::BI__atomic_is_lock_free: {
5110 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5111 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5112 // _Atomic(T) is always properly-aligned.
5113 const char *LibCallName = "__atomic_is_lock_free";
5114 CallArgList Args;
5115 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
5116 getContext().getSizeType());
5117 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5118 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
5120 else
5121 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
5123 const CGFunctionInfo &FuncInfo =
5125 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
5126 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
5127 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
5128 ReturnValueSlot(), Args);
5129 }
5130
5131 case Builtin::BI__atomic_thread_fence:
5132 case Builtin::BI__atomic_signal_fence:
5133 case Builtin::BI__c11_atomic_thread_fence:
5134 case Builtin::BI__c11_atomic_signal_fence: {
5135 llvm::SyncScope::ID SSID;
5136 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5137 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5138 SSID = llvm::SyncScope::SingleThread;
5139 else
5140 SSID = llvm::SyncScope::System;
5141 Value *Order = EmitScalarExpr(E->getArg(0));
5142 if (isa<llvm::ConstantInt>(Order)) {
5143 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5144 switch (ord) {
5145 case 0: // memory_order_relaxed
5146 default: // invalid order
5147 break;
5148 case 1: // memory_order_consume
5149 case 2: // memory_order_acquire
5150 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5151 break;
5152 case 3: // memory_order_release
5153 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5154 break;
5155 case 4: // memory_order_acq_rel
5156 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5157 break;
5158 case 5: // memory_order_seq_cst
5159 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5160 break;
5161 }
5162 return RValue::get(nullptr);
5163 }
5164
5165 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5166 AcquireBB = createBasicBlock("acquire", CurFn);
5167 ReleaseBB = createBasicBlock("release", CurFn);
5168 AcqRelBB = createBasicBlock("acqrel", CurFn);
5169 SeqCstBB = createBasicBlock("seqcst", CurFn);
5170 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5171
5172 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5173 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5174
5175 Builder.SetInsertPoint(AcquireBB);
5176 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5177 Builder.CreateBr(ContBB);
5178 SI->addCase(Builder.getInt32(1), AcquireBB);
5179 SI->addCase(Builder.getInt32(2), AcquireBB);
5180
5181 Builder.SetInsertPoint(ReleaseBB);
5182 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5183 Builder.CreateBr(ContBB);
5184 SI->addCase(Builder.getInt32(3), ReleaseBB);
5185
5186 Builder.SetInsertPoint(AcqRelBB);
5187 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5188 Builder.CreateBr(ContBB);
5189 SI->addCase(Builder.getInt32(4), AcqRelBB);
5190
5191 Builder.SetInsertPoint(SeqCstBB);
5192 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5193 Builder.CreateBr(ContBB);
5194 SI->addCase(Builder.getInt32(5), SeqCstBB);
5195
5196 Builder.SetInsertPoint(ContBB);
5197 return RValue::get(nullptr);
5198 }
5199 case Builtin::BI__scoped_atomic_thread_fence: {
5201
5202 Value *Order = EmitScalarExpr(E->getArg(0));
5203 Value *Scope = EmitScalarExpr(E->getArg(1));
5204 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5205 auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
5206 if (Ord && Scp) {
5207 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5208 ? ScopeModel->map(Scp->getZExtValue())
5209 : ScopeModel->map(ScopeModel->getFallBackValue());
5210 switch (Ord->getZExtValue()) {
5211 case 0: // memory_order_relaxed
5212 default: // invalid order
5213 break;
5214 case 1: // memory_order_consume
5215 case 2: // memory_order_acquire
5216 Builder.CreateFence(
5217 llvm::AtomicOrdering::Acquire,
5218 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5219 llvm::AtomicOrdering::Acquire,
5220 getLLVMContext()));
5221 break;
5222 case 3: // memory_order_release
5223 Builder.CreateFence(
5224 llvm::AtomicOrdering::Release,
5225 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5226 llvm::AtomicOrdering::Release,
5227 getLLVMContext()));
5228 break;
5229 case 4: // memory_order_acq_rel
5230 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5231 getTargetHooks().getLLVMSyncScopeID(
5232 getLangOpts(), SS,
5233 llvm::AtomicOrdering::AcquireRelease,
5234 getLLVMContext()));
5235 break;
5236 case 5: // memory_order_seq_cst
5237 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5238 getTargetHooks().getLLVMSyncScopeID(
5239 getLangOpts(), SS,
5240 llvm::AtomicOrdering::SequentiallyConsistent,
5241 getLLVMContext()));
5242 break;
5243 }
5244 return RValue::get(nullptr);
5245 }
5246
5247 llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5248
5250 OrderBBs;
5251 if (Ord) {
5252 switch (Ord->getZExtValue()) {
5253 case 0: // memory_order_relaxed
5254 default: // invalid order
5255 ContBB->eraseFromParent();
5256 return RValue::get(nullptr);
5257 case 1: // memory_order_consume
5258 case 2: // memory_order_acquire
5259 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5260 llvm::AtomicOrdering::Acquire);
5261 break;
5262 case 3: // memory_order_release
5263 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5264 llvm::AtomicOrdering::Release);
5265 break;
5266 case 4: // memory_order_acq_rel
5267 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5268 llvm::AtomicOrdering::AcquireRelease);
5269 break;
5270 case 5: // memory_order_seq_cst
5271 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5272 llvm::AtomicOrdering::SequentiallyConsistent);
5273 break;
5274 }
5275 } else {
5276 llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
5277 llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
5278 llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
5279 llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
5280
5281 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5282 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5283 SI->addCase(Builder.getInt32(1), AcquireBB);
5284 SI->addCase(Builder.getInt32(2), AcquireBB);
5285 SI->addCase(Builder.getInt32(3), ReleaseBB);
5286 SI->addCase(Builder.getInt32(4), AcqRelBB);
5287 SI->addCase(Builder.getInt32(5), SeqCstBB);
5288
5289 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5290 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5291 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5292 OrderBBs.emplace_back(SeqCstBB,
5293 llvm::AtomicOrdering::SequentiallyConsistent);
5294 }
5295
5296 for (auto &[OrderBB, Ordering] : OrderBBs) {
5297 Builder.SetInsertPoint(OrderBB);
5298 if (Scp) {
5299 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5300 ? ScopeModel->map(Scp->getZExtValue())
5301 : ScopeModel->map(ScopeModel->getFallBackValue());
5302 Builder.CreateFence(Ordering,
5303 getTargetHooks().getLLVMSyncScopeID(
5304 getLangOpts(), SS, Ordering, getLLVMContext()));
5305 Builder.CreateBr(ContBB);
5306 } else {
5307 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5308 for (unsigned Scp : ScopeModel->getRuntimeValues())
5309 BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5310
5311 auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5312 llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5313 for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5314 auto *B = BBs[Scp];
5315 SI->addCase(Builder.getInt32(Scp), B);
5316
5317 Builder.SetInsertPoint(B);
5318 Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5319 getLangOpts(), ScopeModel->map(Scp),
5320 Ordering, getLLVMContext()));
5321 Builder.CreateBr(ContBB);
5322 }
5323 }
5324 }
5325
5326 Builder.SetInsertPoint(ContBB);
5327 return RValue::get(nullptr);
5328 }
5329
5330 case Builtin::BI__builtin_signbit:
5331 case Builtin::BI__builtin_signbitf:
5332 case Builtin::BI__builtin_signbitl: {
5333 return RValue::get(
5334 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5335 ConvertType(E->getType())));
5336 }
5337 case Builtin::BI__warn_memset_zero_len:
5338 return RValue::getIgnored();
5339 case Builtin::BI__annotation: {
5340 // Re-encode each wide string to UTF8 and make an MDString.
5342 for (const Expr *Arg : E->arguments()) {
5343 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5344 assert(Str->getCharByteWidth() == 2);
5345 StringRef WideBytes = Str->getBytes();
5346 std::string StrUtf8;
5347 if (!convertUTF16ToUTF8String(
5348 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5349 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5350 continue;
5351 }
5352 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5353 }
5354
5355 // Build and MDTuple of MDStrings and emit the intrinsic call.
5356 llvm::Function *F =
5357 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5358 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5359 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5360 return RValue::getIgnored();
5361 }
5362 case Builtin::BI__builtin_annotation: {
5363 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5364 llvm::Function *F =
5365 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5366 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5367
5368 // Get the annotation string, go through casts. Sema requires this to be a
5369 // non-wide string literal, potentially casted, so the cast<> is safe.
5370 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5371 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5372 return RValue::get(
5373 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5374 }
5375 case Builtin::BI__builtin_addcb:
5376 case Builtin::BI__builtin_addcs:
5377 case Builtin::BI__builtin_addc:
5378 case Builtin::BI__builtin_addcl:
5379 case Builtin::BI__builtin_addcll:
5380 case Builtin::BI__builtin_subcb:
5381 case Builtin::BI__builtin_subcs:
5382 case Builtin::BI__builtin_subc:
5383 case Builtin::BI__builtin_subcl:
5384 case Builtin::BI__builtin_subcll: {
5385
5386 // We translate all of these builtins from expressions of the form:
5387 // int x = ..., y = ..., carryin = ..., carryout, result;
5388 // result = __builtin_addc(x, y, carryin, &carryout);
5389 //
5390 // to LLVM IR of the form:
5391 //
5392 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5393 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5394 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5395 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5396 // i32 %carryin)
5397 // %result = extractvalue {i32, i1} %tmp2, 0
5398 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5399 // %tmp3 = or i1 %carry1, %carry2
5400 // %tmp4 = zext i1 %tmp3 to i32
5401 // store i32 %tmp4, i32* %carryout
5402
5403 // Scalarize our inputs.
5404 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5405 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5406 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5407 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5408
5409 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5410 llvm::Intrinsic::ID IntrinsicId;
5411 switch (BuiltinID) {
5412 default: llvm_unreachable("Unknown multiprecision builtin id.");
5413 case Builtin::BI__builtin_addcb:
5414 case Builtin::BI__builtin_addcs:
5415 case Builtin::BI__builtin_addc:
5416 case Builtin::BI__builtin_addcl:
5417 case Builtin::BI__builtin_addcll:
5418 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5419 break;
5420 case Builtin::BI__builtin_subcb:
5421 case Builtin::BI__builtin_subcs:
5422 case Builtin::BI__builtin_subc:
5423 case Builtin::BI__builtin_subcl:
5424 case Builtin::BI__builtin_subcll:
5425 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5426 break;
5427 }
5428
5429 // Construct our resulting LLVM IR expression.
5430 llvm::Value *Carry1;
5431 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5432 X, Y, Carry1);
5433 llvm::Value *Carry2;
5434 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5435 Sum1, Carryin, Carry2);
5436 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5437 X->getType());
5438 Builder.CreateStore(CarryOut, CarryOutPtr);
5439 return RValue::get(Sum2);
5440 }
5441
5442 case Builtin::BI__builtin_add_overflow:
5443 case Builtin::BI__builtin_sub_overflow:
5444 case Builtin::BI__builtin_mul_overflow: {
5445 const clang::Expr *LeftArg = E->getArg(0);
5446 const clang::Expr *RightArg = E->getArg(1);
5447 const clang::Expr *ResultArg = E->getArg(2);
5448
5449 clang::QualType ResultQTy =
5450 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5451
5452 WidthAndSignedness LeftInfo =
5454 WidthAndSignedness RightInfo =
5456 WidthAndSignedness ResultInfo =
5458
5459 // Handle mixed-sign multiplication as a special case, because adding
5460 // runtime or backend support for our generic irgen would be too expensive.
5461 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5462 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5463 RightInfo, ResultArg, ResultQTy,
5464 ResultInfo);
5465
5466 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5467 ResultInfo))
5469 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5470 ResultInfo);
5471
5472 WidthAndSignedness EncompassingInfo =
5473 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5474
5475 llvm::Type *EncompassingLLVMTy =
5476 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5477
5478 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5479
5480 llvm::Intrinsic::ID IntrinsicId;
5481 switch (BuiltinID) {
5482 default:
5483 llvm_unreachable("Unknown overflow builtin id.");
5484 case Builtin::BI__builtin_add_overflow:
5485 IntrinsicId = EncompassingInfo.Signed
5486 ? llvm::Intrinsic::sadd_with_overflow
5487 : llvm::Intrinsic::uadd_with_overflow;
5488 break;
5489 case Builtin::BI__builtin_sub_overflow:
5490 IntrinsicId = EncompassingInfo.Signed
5491 ? llvm::Intrinsic::ssub_with_overflow
5492 : llvm::Intrinsic::usub_with_overflow;
5493 break;
5494 case Builtin::BI__builtin_mul_overflow:
5495 IntrinsicId = EncompassingInfo.Signed
5496 ? llvm::Intrinsic::smul_with_overflow
5497 : llvm::Intrinsic::umul_with_overflow;
5498 break;
5499 }
5500
5501 llvm::Value *Left = EmitScalarExpr(LeftArg);
5502 llvm::Value *Right = EmitScalarExpr(RightArg);
5503 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5504
5505 // Extend each operand to the encompassing type.
5506 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5507 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5508
5509 // Perform the operation on the extended values.
5510 llvm::Value *Overflow, *Result;
5511 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5512
5513 if (EncompassingInfo.Width > ResultInfo.Width) {
5514 // The encompassing type is wider than the result type, so we need to
5515 // truncate it.
5516 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5517
5518 // To see if the truncation caused an overflow, we will extend
5519 // the result and then compare it to the original result.
5520 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5521 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5522 llvm::Value *TruncationOverflow =
5523 Builder.CreateICmpNE(Result, ResultTruncExt);
5524
5525 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5526 Result = ResultTrunc;
5527 }
5528
5529 // Finally, store the result using the pointer.
5530 bool isVolatile =
5531 ResultArg->getType()->getPointeeType().isVolatileQualified();
5532 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5533
5534 return RValue::get(Overflow);
5535 }
5536
5537 case Builtin::BI__builtin_uadd_overflow:
5538 case Builtin::BI__builtin_uaddl_overflow:
5539 case Builtin::BI__builtin_uaddll_overflow:
5540 case Builtin::BI__builtin_usub_overflow:
5541 case Builtin::BI__builtin_usubl_overflow:
5542 case Builtin::BI__builtin_usubll_overflow:
5543 case Builtin::BI__builtin_umul_overflow:
5544 case Builtin::BI__builtin_umull_overflow:
5545 case Builtin::BI__builtin_umulll_overflow:
5546 case Builtin::BI__builtin_sadd_overflow:
5547 case Builtin::BI__builtin_saddl_overflow:
5548 case Builtin::BI__builtin_saddll_overflow:
5549 case Builtin::BI__builtin_ssub_overflow:
5550 case Builtin::BI__builtin_ssubl_overflow:
5551 case Builtin::BI__builtin_ssubll_overflow:
5552 case Builtin::BI__builtin_smul_overflow:
5553 case Builtin::BI__builtin_smull_overflow:
5554 case Builtin::BI__builtin_smulll_overflow: {
5555
5556 // We translate all of these builtins directly to the relevant llvm IR node.
5557
5558 // Scalarize our inputs.
5559 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5560 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5561 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5562
5563 // Decide which of the overflow intrinsics we are lowering to:
5564 llvm::Intrinsic::ID IntrinsicId;
5565 switch (BuiltinID) {
5566 default: llvm_unreachable("Unknown overflow builtin id.");
5567 case Builtin::BI__builtin_uadd_overflow:
5568 case Builtin::BI__builtin_uaddl_overflow:
5569 case Builtin::BI__builtin_uaddll_overflow:
5570 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5571 break;
5572 case Builtin::BI__builtin_usub_overflow:
5573 case Builtin::BI__builtin_usubl_overflow:
5574 case Builtin::BI__builtin_usubll_overflow:
5575 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5576 break;
5577 case Builtin::BI__builtin_umul_overflow:
5578 case Builtin::BI__builtin_umull_overflow:
5579 case Builtin::BI__builtin_umulll_overflow:
5580 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5581 break;
5582 case Builtin::BI__builtin_sadd_overflow:
5583 case Builtin::BI__builtin_saddl_overflow:
5584 case Builtin::BI__builtin_saddll_overflow:
5585 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5586 break;
5587 case Builtin::BI__builtin_ssub_overflow:
5588 case Builtin::BI__builtin_ssubl_overflow:
5589 case Builtin::BI__builtin_ssubll_overflow:
5590 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5591 break;
5592 case Builtin::BI__builtin_smul_overflow:
5593 case Builtin::BI__builtin_smull_overflow:
5594 case Builtin::BI__builtin_smulll_overflow:
5595 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5596 break;
5597 }
5598
5599
5600 llvm::Value *Carry;
5601 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5602 Builder.CreateStore(Sum, SumOutPtr);
5603
5604 return RValue::get(Carry);
5605 }
5606 case Builtin::BIaddressof:
5607 case Builtin::BI__addressof:
5608 case Builtin::BI__builtin_addressof:
5609 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5610 case Builtin::BI__builtin_function_start:
5613 case Builtin::BI__builtin_operator_new:
5615 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5616 case Builtin::BI__builtin_operator_delete:
5618 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5619 return RValue::get(nullptr);
5620
5621 case Builtin::BI__builtin_is_aligned:
5622 return EmitBuiltinIsAligned(E);
5623 case Builtin::BI__builtin_align_up:
5624 return EmitBuiltinAlignTo(E, true);
5625 case Builtin::BI__builtin_align_down:
5626 return EmitBuiltinAlignTo(E, false);
5627
5628 case Builtin::BI__noop:
5629 // __noop always evaluates to an integer literal zero.
5630 return RValue::get(ConstantInt::get(IntTy, 0));
5631 case Builtin::BI__builtin_call_with_static_chain: {
5632 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5633 const Expr *Chain = E->getArg(1);
5634 return EmitCall(Call->getCallee()->getType(),
5635 EmitCallee(Call->getCallee()), Call, ReturnValue,
5636 EmitScalarExpr(Chain));
5637 }
5638 case Builtin::BI_InterlockedExchange8:
5639 case Builtin::BI_InterlockedExchange16:
5640 case Builtin::BI_InterlockedExchange:
5641 case Builtin::BI_InterlockedExchangePointer:
5642 return RValue::get(
5643 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5644 case Builtin::BI_InterlockedCompareExchangePointer:
5645 return RValue::get(
5646 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
5647 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5648 return RValue::get(
5649 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
5650 case Builtin::BI_InterlockedCompareExchange8:
5651 case Builtin::BI_InterlockedCompareExchange16:
5652 case Builtin::BI_InterlockedCompareExchange:
5653 case Builtin::BI_InterlockedCompareExchange64:
5655 case Builtin::BI_InterlockedIncrement16:
5656 case Builtin::BI_InterlockedIncrement:
5657 return RValue::get(
5658 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5659 case Builtin::BI_InterlockedDecrement16:
5660 case Builtin::BI_InterlockedDecrement:
5661 return RValue::get(
5662 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5663 case Builtin::BI_InterlockedAnd8:
5664 case Builtin::BI_InterlockedAnd16:
5665 case Builtin::BI_InterlockedAnd:
5666 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5667 case Builtin::BI_InterlockedExchangeAdd8:
5668 case Builtin::BI_InterlockedExchangeAdd16:
5669 case Builtin::BI_InterlockedExchangeAdd:
5670 return RValue::get(
5671 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5672 case Builtin::BI_InterlockedExchangeSub8:
5673 case Builtin::BI_InterlockedExchangeSub16:
5674 case Builtin::BI_InterlockedExchangeSub:
5675 return RValue::get(
5676 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5677 case Builtin::BI_InterlockedOr8:
5678 case Builtin::BI_InterlockedOr16:
5679 case Builtin::BI_InterlockedOr:
5680 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5681 case Builtin::BI_InterlockedXor8:
5682 case Builtin::BI_InterlockedXor16:
5683 case Builtin::BI_InterlockedXor:
5684 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5685
5686 case Builtin::BI_bittest64:
5687 case Builtin::BI_bittest:
5688 case Builtin::BI_bittestandcomplement64:
5689 case Builtin::BI_bittestandcomplement:
5690 case Builtin::BI_bittestandreset64:
5691 case Builtin::BI_bittestandreset:
5692 case Builtin::BI_bittestandset64:
5693 case Builtin::BI_bittestandset:
5694 case Builtin::BI_interlockedbittestandreset:
5695 case Builtin::BI_interlockedbittestandreset64:
5696 case Builtin::BI_interlockedbittestandset64:
5697 case Builtin::BI_interlockedbittestandset:
5698 case Builtin::BI_interlockedbittestandset_acq:
5699 case Builtin::BI_interlockedbittestandset_rel:
5700 case Builtin::BI_interlockedbittestandset_nf:
5701 case Builtin::BI_interlockedbittestandreset_acq:
5702 case Builtin::BI_interlockedbittestandreset_rel:
5703 case Builtin::BI_interlockedbittestandreset_nf:
5704 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5705
5706 // These builtins exist to emit regular volatile loads and stores not
5707 // affected by the -fms-volatile setting.
5708 case Builtin::BI__iso_volatile_load8:
5709 case Builtin::BI__iso_volatile_load16:
5710 case Builtin::BI__iso_volatile_load32:
5711 case Builtin::BI__iso_volatile_load64:
5712 return RValue::get(EmitISOVolatileLoad(*this, E));
5713 case Builtin::BI__iso_volatile_store8:
5714 case Builtin::BI__iso_volatile_store16:
5715 case Builtin::BI__iso_volatile_store32:
5716 case Builtin::BI__iso_volatile_store64:
5717 return RValue::get(EmitISOVolatileStore(*this, E));
5718
5719 case Builtin::BI__builtin_ptrauth_sign_constant:
5720 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5721
5722 case Builtin::BI__builtin_ptrauth_auth:
5723 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5724 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5725 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5726 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5727 case Builtin::BI__builtin_ptrauth_strip: {
5728 // Emit the arguments.
5730 for (auto argExpr : E->arguments())
5731 Args.push_back(EmitScalarExpr(argExpr));
5732
5733 // Cast the value to intptr_t, saving its original type.
5734 llvm::Type *OrigValueType = Args[0]->getType();
5735 if (OrigValueType->isPointerTy())
5736 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5737
5738 switch (BuiltinID) {
5739 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5740 if (Args[4]->getType()->isPointerTy())
5741 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5742 [[fallthrough]];
5743
5744 case Builtin::BI__builtin_ptrauth_auth:
5745 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5746 if (Args[2]->getType()->isPointerTy())
5747 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5748 break;
5749
5750 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5751 if (Args[1]->getType()->isPointerTy())
5752 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5753 break;
5754
5755 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5756 case Builtin::BI__builtin_ptrauth_strip:
5757 break;
5758 }
5759
5760 // Call the intrinsic.
5761 auto IntrinsicID = [&]() -> unsigned {
5762 switch (BuiltinID) {
5763 case Builtin::BI__builtin_ptrauth_auth:
5764 return llvm::Intrinsic::ptrauth_auth;
5765 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5766 return llvm::Intrinsic::ptrauth_resign;
5767 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5768 return llvm::Intrinsic::ptrauth_blend;
5769 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5770 return llvm::Intrinsic::ptrauth_sign_generic;
5771 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5772 return llvm::Intrinsic::ptrauth_sign;
5773 case Builtin::BI__builtin_ptrauth_strip:
5774 return llvm::Intrinsic::ptrauth_strip;
5775 }
5776 llvm_unreachable("bad ptrauth intrinsic");
5777 }();
5778 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5779 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5780
5781 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5782 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5783 OrigValueType->isPointerTy()) {
5784 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5785 }
5786 return RValue::get(Result);
5787 }
5788
5789 case Builtin::BI__exception_code:
5790 case Builtin::BI_exception_code:
5792 case Builtin::BI__exception_info:
5793 case Builtin::BI_exception_info:
5795 case Builtin::BI__abnormal_termination:
5796 case Builtin::BI_abnormal_termination:
5798 case Builtin::BI_setjmpex:
5799 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5800 E->getArg(0)->getType()->isPointerType())
5801 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5802 break;
5803 case Builtin::BI_setjmp:
5804 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5805 E->getArg(0)->getType()->isPointerType()) {
5806 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5807 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5808 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5809 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5810 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5811 }
5812 break;
5813
5814 // C++ std:: builtins.
5815 case Builtin::BImove:
5816 case Builtin::BImove_if_noexcept:
5817 case Builtin::BIforward:
5818 case Builtin::BIforward_like:
5819 case Builtin::BIas_const:
5820 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5821 case Builtin::BI__GetExceptionInfo: {
5822 if (llvm::GlobalVariable *GV =
5824 return RValue::get(GV);
5825 break;
5826 }
5827
5828 case Builtin::BI__fastfail:
5829 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5830
5831 case Builtin::BI__builtin_coro_id:
5832 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5833 case Builtin::BI__builtin_coro_promise:
5834 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5835 case Builtin::BI__builtin_coro_resume:
5836 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5837 return RValue::get(nullptr);
5838 case Builtin::BI__builtin_coro_frame:
5839 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5840 case Builtin::BI__builtin_coro_noop:
5841 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5842 case Builtin::BI__builtin_coro_free:
5843 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5844 case Builtin::BI__builtin_coro_destroy:
5845 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5846 return RValue::get(nullptr);
5847 case Builtin::BI__builtin_coro_done:
5848 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5849 case Builtin::BI__builtin_coro_alloc:
5850 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5851 case Builtin::BI__builtin_coro_begin:
5852 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5853 case Builtin::BI__builtin_coro_end:
5854 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5855 case Builtin::BI__builtin_coro_suspend:
5856 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5857 case Builtin::BI__builtin_coro_size:
5858 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5859 case Builtin::BI__builtin_coro_align:
5860 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5861
5862 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5863 case Builtin::BIread_pipe:
5864 case Builtin::BIwrite_pipe: {
5865 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5866 *Arg1 = EmitScalarExpr(E->getArg(1));
5867 CGOpenCLRuntime OpenCLRT(CGM);
5868 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5869 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5870
5871 // Type of the generic packet parameter.
5872 unsigned GenericAS =
5874 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5875
5876 // Testing which overloaded version we should generate the call for.
5877 if (2U == E->getNumArgs()) {
5878 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5879 : "__write_pipe_2";
5880 // Creating a generic function type to be able to call with any builtin or
5881 // user defined type.
5882 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5883 llvm::FunctionType *FTy = llvm::FunctionType::get(
5884 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5885 Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy);
5886 return RValue::get(
5888 {Arg0, ACast, PacketSize, PacketAlign}));
5889 } else {
5890 assert(4 == E->getNumArgs() &&
5891 "Illegal number of parameters to pipe function");
5892 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5893 : "__write_pipe_4";
5894
5895 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5896 Int32Ty, Int32Ty};
5897 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5898 *Arg3 = EmitScalarExpr(E->getArg(3));
5899 llvm::FunctionType *FTy = llvm::FunctionType::get(
5900 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5901 Value *ACast = Builder.CreateAddrSpaceCast(Arg3, I8PTy);
5902 // We know the third argument is an integer type, but we may need to cast
5903 // it to i32.
5904 if (Arg2->getType() != Int32Ty)
5905 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5906 return RValue::get(
5908 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
5909 }
5910 }
5911 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5912 // functions
5913 case Builtin::BIreserve_read_pipe:
5914 case Builtin::BIreserve_write_pipe:
5915 case Builtin::BIwork_group_reserve_read_pipe:
5916 case Builtin::BIwork_group_reserve_write_pipe:
5917 case Builtin::BIsub_group_reserve_read_pipe:
5918 case Builtin::BIsub_group_reserve_write_pipe: {
5919 // Composing the mangled name for the function.
5920 const char *Name;
5921 if (BuiltinID == Builtin::BIreserve_read_pipe)
5922 Name = "__reserve_read_pipe";
5923 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5924 Name = "__reserve_write_pipe";
5925 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5926 Name = "__work_group_reserve_read_pipe";
5927 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5928 Name = "__work_group_reserve_write_pipe";
5929 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5930 Name = "__sub_group_reserve_read_pipe";
5931 else
5932 Name = "__sub_group_reserve_write_pipe";
5933
5934 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5935 *Arg1 = EmitScalarExpr(E->getArg(1));
5936 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5937 CGOpenCLRuntime OpenCLRT(CGM);
5938 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5939 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5940
5941 // Building the generic function prototype.
5942 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5943 llvm::FunctionType *FTy = llvm::FunctionType::get(
5944 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5945 // We know the second argument is an integer type, but we may need to cast
5946 // it to i32.
5947 if (Arg1->getType() != Int32Ty)
5948 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5950 {Arg0, Arg1, PacketSize, PacketAlign}));
5951 }
5952 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5953 // functions
5954 case Builtin::BIcommit_read_pipe:
5955 case Builtin::BIcommit_write_pipe:
5956 case Builtin::BIwork_group_commit_read_pipe:
5957 case Builtin::BIwork_group_commit_write_pipe:
5958 case Builtin::BIsub_group_commit_read_pipe:
5959 case Builtin::BIsub_group_commit_write_pipe: {
5960 const char *Name;
5961 if (BuiltinID == Builtin::BIcommit_read_pipe)
5962 Name = "__commit_read_pipe";
5963 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5964 Name = "__commit_write_pipe";
5965 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5966 Name = "__work_group_commit_read_pipe";
5967 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5968 Name = "__work_group_commit_write_pipe";
5969 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5970 Name = "__sub_group_commit_read_pipe";
5971 else
5972 Name = "__sub_group_commit_write_pipe";
5973
5974 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5975 *Arg1 = EmitScalarExpr(E->getArg(1));
5976 CGOpenCLRuntime OpenCLRT(CGM);
5977 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5978 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5979
5980 // Building the generic function prototype.
5981 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5982 llvm::FunctionType *FTy =
5983 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5984 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5985
5987 {Arg0, Arg1, PacketSize, PacketAlign}));
5988 }
5989 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5990 case Builtin::BIget_pipe_num_packets:
5991 case Builtin::BIget_pipe_max_packets: {
5992 const char *BaseName;
5993 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5994 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5995 BaseName = "__get_pipe_num_packets";
5996 else
5997 BaseName = "__get_pipe_max_packets";
5998 std::string Name = std::string(BaseName) +
5999 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
6000
6001 // Building the generic function prototype.
6002 Value *Arg0 = EmitScalarExpr(E->getArg(0));
6003 CGOpenCLRuntime OpenCLRT(CGM);
6004 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6005 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6006 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
6007 llvm::FunctionType *FTy = llvm::FunctionType::get(
6008 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6009
6011 {Arg0, PacketSize, PacketAlign}));
6012 }
6013
6014 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6015 case Builtin::BIto_global:
6016 case Builtin::BIto_local:
6017 case Builtin::BIto_private: {
6018 auto Arg0 = EmitScalarExpr(E->getArg(0));
6019 auto NewArgT = llvm::PointerType::get(
6022 auto NewRetT = llvm::PointerType::get(
6026 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
6027 llvm::Value *NewArg;
6028 if (Arg0->getType()->getPointerAddressSpace() !=
6029 NewArgT->getPointerAddressSpace())
6030 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
6031 else
6032 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6033 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
6034 auto NewCall =
6035 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
6036 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
6037 ConvertType(E->getType())));
6038 }
6039
6040 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6041 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6042 // The code below expands the builtin call to a call to one of the following
6043 // functions that an OpenCL runtime library will have to provide:
6044 // __enqueue_kernel_basic
6045 // __enqueue_kernel_varargs
6046 // __enqueue_kernel_basic_events
6047 // __enqueue_kernel_events_varargs
6048 case Builtin::BIenqueue_kernel: {
6049 StringRef Name; // Generated function call name
6050 unsigned NumArgs = E->getNumArgs();
6051
6052 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
6053 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6054 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6055
6056 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
6057 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
6058 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
6059 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
6060 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
6061
6062 if (NumArgs == 4) {
6063 // The most basic form of the call with parameters:
6064 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6065 Name = "__enqueue_kernel_basic";
6066 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
6067 GenericVoidPtrTy};
6068 llvm::FunctionType *FTy = llvm::FunctionType::get(
6069 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6070
6071 auto Info =
6072 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6073 llvm::Value *Kernel =
6074 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6075 llvm::Value *Block =
6076 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6077
6078 auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6079 {Queue, Flags, Range, Kernel, Block});
6080 return RValue::get(RTCall);
6081 }
6082 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
6083
6084 // Create a temporary array to hold the sizes of local pointer arguments
6085 // for the block. \p First is the position of the first size argument.
6086 auto CreateArrayForSizeVar = [=](unsigned First)
6087 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6088 llvm::APInt ArraySize(32, NumArgs - First);
6090 getContext().getSizeType(), ArraySize, nullptr,
6092 /*IndexTypeQuals=*/0);
6093 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
6094 llvm::Value *TmpPtr = Tmp.getPointer();
6095 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6096 // however for cases where the default AS is not the Alloca AS, Tmp is
6097 // actually the Alloca ascasted to the default AS, hence the
6098 // stripPointerCasts()
6099 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6100 llvm::Value *TmpSize = EmitLifetimeStart(
6101 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6102 llvm::Value *ElemPtr;
6103 // Each of the following arguments specifies the size of the corresponding
6104 // argument passed to the enqueued block.
6105 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
6106 for (unsigned I = First; I < NumArgs; ++I) {
6107 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
6108 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
6109 {Zero, Index});
6110 if (I == First)
6111 ElemPtr = GEP;
6112 auto *V =
6113 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
6115 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
6116 }
6117 // Return the Alloca itself rather than a potential ascast as this is only
6118 // used by the paired EmitLifetimeEnd.
6119 return std::tie(ElemPtr, TmpSize, Alloca);
6120 };
6121
6122 // Could have events and/or varargs.
6123 if (E->getArg(3)->getType()->isBlockPointerType()) {
6124 // No events passed, but has variadic arguments.
6125 Name = "__enqueue_kernel_varargs";
6126 auto Info =
6127 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6128 llvm::Value *Kernel =
6129 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6130 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6131 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6132 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6133
6134 // Create a vector of the arguments, as well as a constant value to
6135 // express to the runtime the number of variadic arguments.
6136 llvm::Value *const Args[] = {Queue, Flags,
6137 Range, Kernel,
6138 Block, ConstantInt::get(IntTy, NumArgs - 4),
6139 ElemPtr};
6140 llvm::Type *const ArgTys[] = {
6141 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
6142 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
6143
6144 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
6145 auto Call = RValue::get(
6146 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
6147 if (TmpSize)
6148 EmitLifetimeEnd(TmpSize, TmpPtr);
6149 return Call;
6150 }
6151 // Any calls now have event arguments passed.
6152 if (NumArgs >= 7) {
6153 llvm::PointerType *PtrTy = llvm::PointerType::get(
6156
6157 llvm::Value *NumEvents =
6158 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
6159
6160 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6161 // to be a null pointer constant (including `0` literal), we can take it
6162 // into account and emit null pointer directly.
6163 llvm::Value *EventWaitList = nullptr;
6164 if (E->getArg(4)->isNullPointerConstant(
6166 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6167 } else {
6168 EventWaitList =
6169 E->getArg(4)->getType()->isArrayType()
6170 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
6171 : EmitScalarExpr(E->getArg(4));
6172 // Convert to generic address space.
6173 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
6174 }
6175 llvm::Value *EventRet = nullptr;
6176 if (E->getArg(5)->isNullPointerConstant(
6178 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6179 } else {
6180 EventRet =
6181 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
6182 }
6183
6184 auto Info =
6185 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
6186 llvm::Value *Kernel =
6187 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6188 llvm::Value *Block =
6189 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6190
6191 std::vector<llvm::Type *> ArgTys = {
6192 QueueTy, Int32Ty, RangeTy, Int32Ty,
6193 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6194
6195 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
6196 NumEvents, EventWaitList, EventRet,
6197 Kernel, Block};
6198
6199 if (NumArgs == 7) {
6200 // Has events but no variadics.
6201 Name = "__enqueue_kernel_basic_events";
6202 llvm::FunctionType *FTy = llvm::FunctionType::get(
6203 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6204 return RValue::get(
6207 }
6208 // Has event info and variadics
6209 // Pass the number of variadics to the runtime function too.
6210 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
6211 ArgTys.push_back(Int32Ty);
6212 Name = "__enqueue_kernel_events_varargs";
6213
6214 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6215 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6216 Args.push_back(ElemPtr);
6217 ArgTys.push_back(ElemPtr->getType());
6218
6219 llvm::FunctionType *FTy = llvm::FunctionType::get(
6220 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6221 auto Call =
6224 if (TmpSize)
6225 EmitLifetimeEnd(TmpSize, TmpPtr);
6226 return Call;
6227 }
6228 llvm_unreachable("Unexpected enqueue_kernel signature");
6229 }
6230 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6231 // parameter.
6232 case Builtin::BIget_kernel_work_group_size: {
6233 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6234 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6235 auto Info =
6236 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6237 Value *Kernel =
6238 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6239 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6242 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6243 false),
6244 "__get_kernel_work_group_size_impl"),
6245 {Kernel, Arg}));
6246 }
6247 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6248 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6249 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6250 auto Info =
6251 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6252 Value *Kernel =
6253 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6254 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6257 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6258 false),
6259 "__get_kernel_preferred_work_group_size_multiple_impl"),
6260 {Kernel, Arg}));
6261 }
6262 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6263 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6264 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6265 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6266 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
6267 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
6268 auto Info =
6269 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
6270 Value *Kernel =
6271 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6272 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6273 const char *Name =
6274 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6275 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6276 : "__get_kernel_sub_group_count_for_ndrange_impl";
6279 llvm::FunctionType::get(
6280 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6281 false),
6282 Name),
6283 {NDRange, Kernel, Block}));
6284 }
6285 case Builtin::BI__builtin_store_half:
6286 case Builtin::BI__builtin_store_halff: {
6287 Value *Val = EmitScalarExpr(E->getArg(0));
6289 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
6290 Builder.CreateStore(HalfVal, Address);
6291 return RValue::get(nullptr);
6292 }
6293 case Builtin::BI__builtin_load_half: {
6295 Value *HalfVal = Builder.CreateLoad(Address);
6296 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
6297 }
6298 case Builtin::BI__builtin_load_halff: {
6300 Value *HalfVal = Builder.CreateLoad(Address);
6301 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6302 }
6303 case Builtin::BI__builtin_printf:
6304 case Builtin::BIprintf:
6305 if (getTarget().getTriple().isNVPTX() ||
6306 getTarget().getTriple().isAMDGCN() ||
6307 (getTarget().getTriple().isSPIRV() &&
6308 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6309 if (getTarget().getTriple().isNVPTX())
6311 if ((getTarget().getTriple().isAMDGCN() ||
6312 getTarget().getTriple().isSPIRV()) &&
6313 getLangOpts().HIP)
6315 }
6316
6317 break;
6318 case Builtin::BI__builtin_canonicalize:
6319 case Builtin::BI__builtin_canonicalizef:
6320 case Builtin::BI__builtin_canonicalizef16:
6321 case Builtin::BI__builtin_canonicalizel:
6322 return RValue::get(
6323 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6324
6325 case Builtin::BI__builtin_thread_pointer: {
6326 if (!getContext().getTargetInfo().isTLSSupported())
6327 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6328 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6329 break;
6330 }
6331 case Builtin::BI__builtin_os_log_format:
6332 return emitBuiltinOSLogFormat(*E);
6333
6334 case Builtin::BI__xray_customevent: {
6336 return RValue::getIgnored();
6337
6340 return RValue::getIgnored();
6341
6342 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6343 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6344 return RValue::getIgnored();
6345
6346 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6347 auto FTy = F->getFunctionType();
6348 auto Arg0 = E->getArg(0);
6349 auto Arg0Val = EmitScalarExpr(Arg0);
6350 auto Arg0Ty = Arg0->getType();
6351 auto PTy0 = FTy->getParamType(0);
6352 if (PTy0 != Arg0Val->getType()) {
6353 if (Arg0Ty->isArrayType())
6354 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6355 else
6356 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6357 }
6358 auto Arg1 = EmitScalarExpr(E->getArg(1));
6359 auto PTy1 = FTy->getParamType(1);
6360 if (PTy1 != Arg1->getType())
6361 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6362 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6363 }
6364
6365 case Builtin::BI__xray_typedevent: {
6366 // TODO: There should be a way to always emit events even if the current
6367 // function is not instrumented. Losing events in a stream can cripple
6368 // a trace.
6370 return RValue::getIgnored();
6371
6374 return RValue::getIgnored();
6375
6376 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6377 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6378 return RValue::getIgnored();
6379
6380 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6381 auto FTy = F->getFunctionType();
6382 auto Arg0 = EmitScalarExpr(E->getArg(0));
6383 auto PTy0 = FTy->getParamType(0);
6384 if (PTy0 != Arg0->getType())
6385 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6386 auto Arg1 = E->getArg(1);
6387 auto Arg1Val = EmitScalarExpr(Arg1);
6388 auto Arg1Ty = Arg1->getType();
6389 auto PTy1 = FTy->getParamType(1);
6390 if (PTy1 != Arg1Val->getType()) {
6391 if (Arg1Ty->isArrayType())
6392 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6393 else
6394 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6395 }
6396 auto Arg2 = EmitScalarExpr(E->getArg(2));
6397 auto PTy2 = FTy->getParamType(2);
6398 if (PTy2 != Arg2->getType())
6399 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6400 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6401 }
6402
6403 case Builtin::BI__builtin_ms_va_start:
6404 case Builtin::BI__builtin_ms_va_end:
6405 return RValue::get(
6407 BuiltinID == Builtin::BI__builtin_ms_va_start));
6408
6409 case Builtin::BI__builtin_ms_va_copy: {
6410 // Lower this manually. We can't reliably determine whether or not any
6411 // given va_copy() is for a Win64 va_list from the calling convention
6412 // alone, because it's legal to do this from a System V ABI function.
6413 // With opaque pointer types, we won't have enough information in LLVM
6414 // IR to determine this from the argument types, either. Best to do it
6415 // now, while we have enough information.
6416 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6417 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6418
6419 DestAddr = DestAddr.withElementType(Int8PtrTy);
6420 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6421
6422 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6423 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6424 }
6425
6426 case Builtin::BI__builtin_get_device_side_mangled_name: {
6427 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6428 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6429 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6430 return RValue::get(Str.getPointer());
6431 }
6432 }
6433
6434 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6435 // the call using the normal call path, but using the unmangled
6436 // version of the function name.
6437 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6438 return emitLibraryCall(*this, FD, E,
6439 CGM.getBuiltinLibFunction(FD, BuiltinID));
6440
6441 // If this is a predefined lib function (e.g. malloc), emit the call
6442 // using exactly the normal call path.
6443 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6444 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6445
6446 // Check that a call to a target specific builtin has the correct target
6447 // features.
6448 // This is down here to avoid non-target specific builtins, however, if
6449 // generic builtins start to require generic target features then we
6450 // can move this up to the beginning of the function.
6452
6453 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6454 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6455
6456 // See if we have a target specific intrinsic.
6457 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6458 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6459 StringRef Prefix =
6460 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6461 if (!Prefix.empty()) {
6462 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6463 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6464 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6465 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6466 // NOTE we don't need to perform a compatibility flag check here since the
6467 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6468 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6469 if (IntrinsicID == Intrinsic::not_intrinsic)
6470 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6471 }
6472
6473 if (IntrinsicID != Intrinsic::not_intrinsic) {
6475
6476 // Find out if any arguments are required to be integer constant
6477 // expressions.
6478 unsigned ICEArguments = 0;
6480 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6481 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6482
6483 Function *F = CGM.getIntrinsic(IntrinsicID);
6484 llvm::FunctionType *FTy = F->getFunctionType();
6485
6486 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6487 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6488 // If the intrinsic arg type is different from the builtin arg type
6489 // we need to do a bit cast.
6490 llvm::Type *PTy = FTy->getParamType(i);
6491 if (PTy != ArgValue->getType()) {
6492 // XXX - vector of pointers?
6493 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6494 if (PtrTy->getAddressSpace() !=
6495 ArgValue->getType()->getPointerAddressSpace()) {
6496 ArgValue = Builder.CreateAddrSpaceCast(
6497 ArgValue, llvm::PointerType::get(getLLVMContext(),
6498 PtrTy->getAddressSpace()));
6499 }
6500 }
6501
6502 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6503 // in amx intrinsics.
6504 if (PTy->isX86_AMXTy())
6505 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6506 {ArgValue->getType()}, {ArgValue});
6507 else
6508 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6509 }
6510
6511 Args.push_back(ArgValue);
6512 }
6513
6514 Value *V = Builder.CreateCall(F, Args);
6515 QualType BuiltinRetType = E->getType();
6516
6517 llvm::Type *RetTy = VoidTy;
6518 if (!BuiltinRetType->isVoidType())
6519 RetTy = ConvertType(BuiltinRetType);
6520
6521 if (RetTy != V->getType()) {
6522 // XXX - vector of pointers?
6523 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6524 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6526 V, llvm::PointerType::get(getLLVMContext(),
6527 PtrTy->getAddressSpace()));
6528 }
6529 }
6530
6531 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6532 // in amx intrinsics.
6533 if (V->getType()->isX86_AMXTy())
6534 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6535 {V});
6536 else
6537 V = Builder.CreateBitCast(V, RetTy);
6538 }
6539
6540 if (RetTy->isVoidTy())
6541 return RValue::get(nullptr);
6542
6543 return RValue::get(V);
6544 }
6545
6546 // Some target-specific builtins can have aggregate return values, e.g.
6547 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6548 // ReturnValue to be non-null, so that the target-specific emission code can
6549 // always just emit into it.
6551 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6552 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6553 ReturnValue = ReturnValueSlot(DestPtr, false);
6554 }
6555
6556 // Now see if we can emit a target-specific builtin.
6557 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6558 switch (EvalKind) {
6559 case TEK_Scalar:
6560 if (V->getType()->isVoidTy())
6561 return RValue::get(nullptr);
6562 return RValue::get(V);
6563 case TEK_Aggregate:
6564 return RValue::getAggregate(ReturnValue.getAddress(),
6565 ReturnValue.isVolatile());
6566 case TEK_Complex:
6567 llvm_unreachable("No current target builtin returns complex");
6568 }
6569 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6570 }
6571
6572 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6573 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
6574 switch (EvalKind) {
6575 case TEK_Scalar:
6576 if (V->getType()->isVoidTy())
6577 return RValue::get(nullptr);
6578 return RValue::get(V);
6579 case TEK_Aggregate:
6580 return RValue::getAggregate(ReturnValue.getAddress(),
6581 ReturnValue.isVolatile());
6582 case TEK_Complex:
6583 llvm_unreachable("No current hlsl builtin returns complex");
6584 }
6585 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6586 }
6587
6588 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6589 return EmitHipStdParUnsupportedBuiltin(this, FD);
6590
6591 ErrorUnsupported(E, "builtin function");
6592
6593 // Unknown builtin, for now just dump it out and return undef.
6594 return GetUndefRValue(E->getType());
6595}
6596
6598 unsigned BuiltinID, const CallExpr *E,
6599 ReturnValueSlot ReturnValue,
6600 llvm::Triple::ArchType Arch) {
6601 // When compiling in HipStdPar mode we have to be conservative in rejecting
6602 // target specific features in the FE, and defer the possible error to the
6603 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6604 // referenced by an accelerator executable function, we emit an error.
6605 // Returning nullptr here leads to the builtin being handled in
6606 // EmitStdParUnsupportedBuiltin.
6607 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6608 Arch != CGF->getTarget().getTriple().getArch())
6609 return nullptr;
6610
6611 switch (Arch) {
6612 case llvm::Triple::arm:
6613 case llvm::Triple::armeb:
6614 case llvm::Triple::thumb:
6615 case llvm::Triple::thumbeb:
6616 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6617 case llvm::Triple::aarch64:
6618 case llvm::Triple::aarch64_32:
6619 case llvm::Triple::aarch64_be:
6620 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6621 case llvm::Triple::bpfeb:
6622 case llvm::Triple::bpfel:
6623 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6624 case llvm::Triple::x86:
6625 case llvm::Triple::x86_64:
6626 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6627 case llvm::Triple::ppc:
6628 case llvm::Triple::ppcle:
6629 case llvm::Triple::ppc64:
6630 case llvm::Triple::ppc64le:
6631 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6632 case llvm::Triple::r600:
6633 case llvm::Triple::amdgcn:
6634 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6635 case llvm::Triple::systemz:
6636 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6637 case llvm::Triple::nvptx:
6638 case llvm::Triple::nvptx64:
6639 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6640 case llvm::Triple::wasm32:
6641 case llvm::Triple::wasm64:
6642 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6643 case llvm::Triple::hexagon:
6644 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6645 case llvm::Triple::riscv32:
6646 case llvm::Triple::riscv64:
6647 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6648 case llvm::Triple::spirv:
6649 return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E);
6650 case llvm::Triple::spirv64:
6651 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6652 return nullptr;
6653 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6654 default:
6655 return nullptr;
6656 }
6657}
6658
6660 const CallExpr *E,
6661 ReturnValueSlot ReturnValue) {
6662 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6663 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6665 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6666 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6667 }
6668
6669 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6670 getTarget().getTriple().getArch());
6671}
6672
6673static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6674 NeonTypeFlags TypeFlags,
6675 bool HasLegalHalfType = true,
6676 bool V1Ty = false,
6677 bool AllowBFloatArgsAndRet = true) {
6678 int IsQuad = TypeFlags.isQuad();
6679 switch (TypeFlags.getEltType()) {
6682 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6685 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6687 if (AllowBFloatArgsAndRet)
6688 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6689 else
6690 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6692 if (HasLegalHalfType)
6693 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6694 else
6695 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6697 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6700 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6702 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6703 // There is a lot of i128 and f128 API missing.
6704 // so we use v16i8 to represent poly128 and get pattern matched.
6705 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6707 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6709 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6710 }
6711 llvm_unreachable("Unknown vector element type!");
6712}
6713
6714static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6715 NeonTypeFlags IntTypeFlags) {
6716 int IsQuad = IntTypeFlags.isQuad();
6717 switch (IntTypeFlags.getEltType()) {
6719 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6721 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6723 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6724 default:
6725 llvm_unreachable("Type can't be converted to floating-point!");
6726 }
6727}
6728
6730 const ElementCount &Count) {
6731 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6732 return Builder.CreateShuffleVector(V, V, SV, "lane");
6733}
6734
6736 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6737 return EmitNeonSplat(V, C, EC);
6738}
6739
6741 const char *name,
6742 unsigned shift, bool rightshift) {
6743 unsigned j = 0;
6744 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6745 ai != ae; ++ai, ++j) {
6746 if (F->isConstrainedFPIntrinsic())
6747 if (ai->getType()->isMetadataTy())
6748 continue;
6749 if (shift > 0 && shift == j)
6750 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6751 else
6752 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6753 }
6754
6755 if (F->isConstrainedFPIntrinsic())
6756 return Builder.CreateConstrainedFPCall(F, Ops, name);
6757 else
6758 return Builder.CreateCall(F, Ops, name);
6759}
6760
6762 bool neg) {
6763 int SV = cast<ConstantInt>(V)->getSExtValue();
6764 return ConstantInt::get(Ty, neg ? -SV : SV);
6765}
6766
6767// Right-shift a vector by a constant.
6769 llvm::Type *Ty, bool usgn,
6770 const char *name) {
6771 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6772
6773 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6774 int EltSize = VTy->getScalarSizeInBits();
6775
6776 Vec = Builder.CreateBitCast(Vec, Ty);
6777
6778 // lshr/ashr are undefined when the shift amount is equal to the vector
6779 // element size.
6780 if (ShiftAmt == EltSize) {
6781 if (usgn) {
6782 // Right-shifting an unsigned value by its size yields 0.
6783 return llvm::ConstantAggregateZero::get(VTy);
6784 } else {
6785 // Right-shifting a signed value by its size is equivalent
6786 // to a shift of size-1.
6787 --ShiftAmt;
6788 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6789 }
6790 }
6791
6792 Shift = EmitNeonShiftVector(Shift, Ty, false);
6793 if (usgn)
6794 return Builder.CreateLShr(Vec, Shift, name);
6795 else
6796 return Builder.CreateAShr(Vec, Shift, name);
6797}
6798
6799enum {
6800 AddRetType = (1 << 0),
6801 Add1ArgType = (1 << 1),
6802 Add2ArgTypes = (1 << 2),
6803
6806
6808 UnsignedAlts = (1 << 6),
6809
6812
6820
6821namespace {
6822struct ARMVectorIntrinsicInfo {
6823 const char *NameHint;
6824 unsigned BuiltinID;
6825 unsigned LLVMIntrinsic;
6826 unsigned AltLLVMIntrinsic;
6828
6829 bool operator<(unsigned RHSBuiltinID) const {
6830 return BuiltinID < RHSBuiltinID;
6831 }
6832 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6833 return BuiltinID < TE.BuiltinID;
6834 }
6835};
6836} // end anonymous namespace
6837
6838#define NEONMAP0(NameBase) \
6839 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6840
6841#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6842 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6843 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6844
6845#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6846 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6847 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6848 TypeModifier }
6849
6850static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6851 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6852 NEONMAP0(splat_lane_v),
6853 NEONMAP0(splat_laneq_v),
6854 NEONMAP0(splatq_lane_v),
6855 NEONMAP0(splatq_laneq_v),
6856 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6857 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6858 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6859 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6860 NEONMAP0(vadd_v),
6861 NEONMAP0(vaddhn_v),
6862 NEONMAP0(vaddq_v),
6863 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6864 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6865 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6866 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6867 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6868 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6869 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6870 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6871 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6872 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6873 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6874 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6875 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6876 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6877 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6878 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6879 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6880 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6881 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6882 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6883 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6884 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6885 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6886 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6887 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6888 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6889 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6890 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6891 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6892 NEONMAP0(vceqz_v),
6893 NEONMAP0(vceqzq_v),
6894 NEONMAP0(vcgez_v),
6895 NEONMAP0(vcgezq_v),
6896 NEONMAP0(vcgtz_v),
6897 NEONMAP0(vcgtzq_v),
6898 NEONMAP0(vclez_v),
6899 NEONMAP0(vclezq_v),
6900 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6901 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6902 NEONMAP0(vcltz_v),
6903 NEONMAP0(vcltzq_v),
6904 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6905 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6906 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6907 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6908 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6909 NEONMAP0(vcvt_f16_s16),
6910 NEONMAP0(vcvt_f16_u16),
6911 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6912 NEONMAP0(vcvt_f32_v),
6913 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6914 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6915 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6916 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6917 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6918 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6919 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6920 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6921 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6922 NEONMAP0(vcvt_s16_f16),
6923 NEONMAP0(vcvt_s32_v),
6924 NEONMAP0(vcvt_s64_v),
6925 NEONMAP0(vcvt_u16_f16),
6926 NEONMAP0(vcvt_u32_v),
6927 NEONMAP0(vcvt_u64_v),
6928 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6929 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6930 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6931 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6932 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6933 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6934 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6935 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6936 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6937 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6938 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6939 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6940 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6941 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6942 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6943 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6944 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6945 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6946 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6947 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6948 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6949 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6950 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6951 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6952 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6953 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6954 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6955 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6956 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6957 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6958 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6959 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6960 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6961 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6962 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6963 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6964 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6965 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6966 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6967 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6968 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6969 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6970 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6971 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6972 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6973 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6974 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6975 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6976 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6977 NEONMAP0(vcvtq_f16_s16),
6978 NEONMAP0(vcvtq_f16_u16),
6979 NEONMAP0(vcvtq_f32_v),
6980 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6981 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6982 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6983 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6984 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6985 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6986 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6987 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6988 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6989 NEONMAP0(vcvtq_s16_f16),
6990 NEONMAP0(vcvtq_s32_v),
6991 NEONMAP0(vcvtq_s64_v),
6992 NEONMAP0(vcvtq_u16_f16),
6993 NEONMAP0(vcvtq_u32_v),
6994 NEONMAP0(vcvtq_u64_v),
6995 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6996 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6997 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6998 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6999 NEONMAP0(vext_v),
7000 NEONMAP0(vextq_v),
7001 NEONMAP0(vfma_v),
7002 NEONMAP0(vfmaq_v),
7003 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7004 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7005 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7006 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7007 NEONMAP0(vld1_dup_v),
7008 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7009 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7010 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7011 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7012 NEONMAP0(vld1q_dup_v),
7013 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7014 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7015 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7016 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7017 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7018 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7019 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7020 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7021 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7022 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7023 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7024 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7025 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7026 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7027 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7028 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7029 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7030 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7031 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7032 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7033 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7034 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7035 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7036 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
7037 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
7038 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7039 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7040 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
7041 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
7042 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7043 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7044 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7045 NEONMAP0(vmovl_v),
7046 NEONMAP0(vmovn_v),
7047 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
7048 NEONMAP0(vmull_v),
7049 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
7050 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7051 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7052 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
7053 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7054 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7055 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
7056 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
7057 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
7058 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
7059 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
7060 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7061 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7062 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7063 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7064 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
7065 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
7066 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
7067 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
7068 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
7069 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
7070 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
7071 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
7072 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
7073 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
7074 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
7075 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
7076 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
7077 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
7078 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
7079 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
7080 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
7081 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7082 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7083 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7084 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7085 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7086 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7087 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7088 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7089 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7090 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7091 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
7092 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7093 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7094 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
7095 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
7096 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7097 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7098 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
7099 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
7100 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
7101 NEONMAP0(vrndi_v),
7102 NEONMAP0(vrndiq_v),
7103 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
7104 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
7105 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
7106 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
7107 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
7108 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
7109 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
7110 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
7111 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
7112 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7113 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7114 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7115 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7116 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7117 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7118 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
7119 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
7120 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
7121 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7122 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7123 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7124 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7125 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7126 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7127 NEONMAP0(vshl_n_v),
7128 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7129 NEONMAP0(vshll_n_v),
7130 NEONMAP0(vshlq_n_v),
7131 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7132 NEONMAP0(vshr_n_v),
7133 NEONMAP0(vshrn_n_v),
7134 NEONMAP0(vshrq_n_v),
7135 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7136 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7137 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7138 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7139 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7140 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7141 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7142 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7143 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7144 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7145 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7146 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7147 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7148 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7149 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7150 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7151 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7152 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7153 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7154 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7155 NEONMAP0(vsubhn_v),
7156 NEONMAP0(vtrn_v),
7157 NEONMAP0(vtrnq_v),
7158 NEONMAP0(vtst_v),
7159 NEONMAP0(vtstq_v),
7160 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7161 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7162 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7163 NEONMAP0(vuzp_v),
7164 NEONMAP0(vuzpq_v),
7165 NEONMAP0(vzip_v),
7166 NEONMAP0(vzipq_v)
7167};
7168
7169static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7170 NEONMAP0(splat_lane_v),
7171 NEONMAP0(splat_laneq_v),
7172 NEONMAP0(splatq_lane_v),
7173 NEONMAP0(splatq_laneq_v),
7174 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7175 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7176 NEONMAP0(vadd_v),
7177 NEONMAP0(vaddhn_v),
7178 NEONMAP0(vaddq_p128),
7179 NEONMAP0(vaddq_v),
7180 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7181 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7182 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7183 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7184 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7185 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7186 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7187 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7188 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7189 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7190 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7191 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7192 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7193 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7194 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7195 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7196 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7197 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7198 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7199 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7200 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7201 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7202 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7203 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
7204 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7205 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7206 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
7207 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7208 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7209 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7210 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7211 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7212 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7213 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7214 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7215 NEONMAP0(vceqz_v),
7216 NEONMAP0(vceqzq_v),
7217 NEONMAP0(vcgez_v),
7218 NEONMAP0(vcgezq_v),
7219 NEONMAP0(vcgtz_v),
7220 NEONMAP0(vcgtzq_v),
7221 NEONMAP0(vclez_v),
7222 NEONMAP0(vclezq_v),
7223 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
7224 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
7225 NEONMAP0(vcltz_v),
7226 NEONMAP0(vcltzq_v),
7227 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7228 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7229 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7230 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7231 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7232 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7233 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7234 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7235 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7236 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7237 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7238 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7239 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
7240 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7241 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7242 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
7243 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7244 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7245 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
7246 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7247 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7248 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
7249 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7250 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7251 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7252 NEONMAP0(vcvt_f16_s16),
7253 NEONMAP0(vcvt_f16_u16),
7254 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7255 NEONMAP0(vcvt_f32_v),
7256 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7257 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7258 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7259 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7260 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7261 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7262 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7263 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7264 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7265 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7266 NEONMAP0(vcvtq_f16_s16),
7267 NEONMAP0(vcvtq_f16_u16),
7268 NEONMAP0(vcvtq_f32_v),
7269 NEONMAP0(vcvtq_high_bf16_f32),
7270 NEONMAP0(vcvtq_low_bf16_f32),
7271 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7272 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7273 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7274 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7275 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7276 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7277 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7278 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7279 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7280 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7281 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
7282 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7283 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7284 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7285 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7286 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7287 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7288 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7289 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7290 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7291 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7292 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7293 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7294 NEONMAP0(vext_v),
7295 NEONMAP0(vextq_v),
7296 NEONMAP0(vfma_v),
7297 NEONMAP0(vfmaq_v),
7298 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7299 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7300 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7301 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7302 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7303 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7304 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7305 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7306 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7307 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7308 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7309 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7310 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7311 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7312 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7313 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7314 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7315 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7316 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7317 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7318 NEONMAP0(vmovl_v),
7319 NEONMAP0(vmovn_v),
7320 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7321 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7322 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7323 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7324 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7325 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7326 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7327 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7328 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7329 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7330 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7331 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7332 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7333 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7334 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7335 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7336 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7337 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7338 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7339 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7340 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7341 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7342 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7343 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7344 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7345 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7346 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7347 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7348 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7349 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7350 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7351 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7352 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7353 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7354 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7355 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7356 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7357 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7358 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7359 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7360 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7361 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7362 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7363 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7364 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7365 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7366 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7367 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7368 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7369 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7370 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7371 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7372 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7373 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7374 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7375 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7376 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7377 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7378 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7379 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7380 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7381 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7382 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7383 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7384 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7385 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7386 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7387 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7388 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7389 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7390 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7391 NEONMAP0(vrndi_v),
7392 NEONMAP0(vrndiq_v),
7393 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7394 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7395 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7396 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7397 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7398 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7399 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7400 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7401 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7402 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7403 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7404 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7405 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7406 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7407 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7408 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7409 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7410 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7411 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7412 NEONMAP0(vshl_n_v),
7413 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7414 NEONMAP0(vshll_n_v),
7415 NEONMAP0(vshlq_n_v),
7416 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7417 NEONMAP0(vshr_n_v),
7418 NEONMAP0(vshrn_n_v),
7419 NEONMAP0(vshrq_n_v),
7420 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7421 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7422 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7423 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7424 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7425 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7426 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7427 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7428 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7429 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7430 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7431 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7432 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7433 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7434 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7435 NEONMAP0(vsubhn_v),
7436 NEONMAP0(vtst_v),
7437 NEONMAP0(vtstq_v),
7438 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7439 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7440 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7441 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7442};
7443
7444static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7445 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7446 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7447 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7448 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7449 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7450 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7451 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7452 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7453 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7454 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7455 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7456 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7457 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7458 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7459 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7460 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7461 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7462 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7463 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7464 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7465 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7466 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7467 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7468 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7469 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7470 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7471 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7472 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7473 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7474 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7475 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7476 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7477 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7478 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7479 NEONMAP0(vcvth_bf16_f32),
7480 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7481 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7482 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7483 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7484 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7485 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7486 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7487 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7488 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7489 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7490 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7491 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7492 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7493 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7494 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7495 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7496 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7497 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7498 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7499 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7500 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7501 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7502 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7503 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7504 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7505 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7506 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7507 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7508 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7509 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7510 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7511 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7512 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7513 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7514 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7515 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7516 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7517 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7518 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7519 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7520 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7521 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7522 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7523 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7524 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7525 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7526 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7527 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7528 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7529 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7530 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7531 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7532 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7533 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7534 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7535 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7536 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7537 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7538 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7539 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7540 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7541 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7542 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7543 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7544 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7545 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7546 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7547 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7548 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7549 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7550 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7551 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7552 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7553 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7554 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7555 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7556 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7557 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7558 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7559 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7560 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7561 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7562 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7563 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7564 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7565 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7566 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7567 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7568 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7569 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7570 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7571 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7572 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7573 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7574 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7575 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7576 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7577 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7578 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7579 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7580 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7581 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7582 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7583 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7584 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7585 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7586 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7587 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7588 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7589 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7590 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7591 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7592 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7593 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7594 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7595 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7596 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7597 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7598 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7599 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7600 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7601 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7602 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7603 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7604 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7605 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7606 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7607 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7608 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7609 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7610 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7611 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7612 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7613 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7614 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7615 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7616 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7617 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7618 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7619 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7620 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7621 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7622 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7623 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7624 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7625 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7626 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7627 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7628 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7629 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7630 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7631 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7632 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7633 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7634 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7635 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7636 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7637 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7638 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7639 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7640 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7641 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7642 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7643 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7644 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7645 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7646 // FP16 scalar intrinisics go here.
7647 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7648 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7649 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7650 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7651 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7652 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7653 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7654 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7655 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7656 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7657 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7658 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7659 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7660 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7661 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7662 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7663 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7664 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7665 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7666 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7667 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7668 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7669 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7670 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7671 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7672 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7673 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7674 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7675 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7676 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7677 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7678 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7679 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7680 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7681};
7682
7683// Some intrinsics are equivalent for codegen.
7684static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7685 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7686 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7687 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7688 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7689 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7690 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7691 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7692 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7693 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7694 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7695 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7696 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7697 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7698 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7699 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7700 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7701 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7702 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7703 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7704 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7705 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7706 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7707 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7708 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7709 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7710 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7711 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7712 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7713 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7714 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7715 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7716 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7717 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7718 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7719 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7720 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7721 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7722 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7723 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7724 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7725 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7726 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7727 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7728 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7729 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7730 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7731 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7732 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7733 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7734 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7735 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7736 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7737 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7738 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7739 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7740 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7741 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7742 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7743 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7744 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7745 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7746 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7747 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7748 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7749 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7750 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7751 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7752 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7753 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7754 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7755 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7756 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7757 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7758 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7759 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7760 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7761 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7762 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7763 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7764 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7765 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7766 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7767 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7768 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7769 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7770 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7771 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7772 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7773 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7774 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7775 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7776 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7777 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7778 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7779 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7780 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7781 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7782 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7783 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7784 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7785 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7786 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7787 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7788 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7789 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7790 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7791 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7792 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7793 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7794 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7795 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7796 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7797 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7798 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7799 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7800 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7801 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7802 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7803 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7804 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7805 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7806 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7807 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7808 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7809 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7810 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7811 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7812 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7813 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7814 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7815 // arbitrary one to be handled as tha canonical variation.
7816 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7817 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7818 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7819 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7820 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7821 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7822 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7823 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7824 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7825 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7826 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7827 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7828};
7829
7830#undef NEONMAP0
7831#undef NEONMAP1
7832#undef NEONMAP2
7833
7834#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7835 { \
7836 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7837 TypeModifier \
7838 }
7839
7840#define SVEMAP2(NameBase, TypeModifier) \
7841 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7842static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7843#define GET_SVE_LLVM_INTRINSIC_MAP
7844#include "clang/Basic/arm_sve_builtin_cg.inc"
7845#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7846#undef GET_SVE_LLVM_INTRINSIC_MAP
7847};
7848
7849#undef SVEMAP1
7850#undef SVEMAP2
7851
7852#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7853 { \
7854 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7855 TypeModifier \
7856 }
7857
7858#define SMEMAP2(NameBase, TypeModifier) \
7859 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7860static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7861#define GET_SME_LLVM_INTRINSIC_MAP
7862#include "clang/Basic/arm_sme_builtin_cg.inc"
7863#undef GET_SME_LLVM_INTRINSIC_MAP
7864};
7865
7866#undef SMEMAP1
7867#undef SMEMAP2
7868
7870
7875
7876static const ARMVectorIntrinsicInfo *
7878 unsigned BuiltinID, bool &MapProvenSorted) {
7879
7880#ifndef NDEBUG
7881 if (!MapProvenSorted) {
7882 assert(llvm::is_sorted(IntrinsicMap));
7883 MapProvenSorted = true;
7884 }
7885#endif
7886
7887 const ARMVectorIntrinsicInfo *Builtin =
7888 llvm::lower_bound(IntrinsicMap, BuiltinID);
7889
7890 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7891 return Builtin;
7892
7893 return nullptr;
7894}
7895
7897 unsigned Modifier,
7898 llvm::Type *ArgType,
7899 const CallExpr *E) {
7900 int VectorSize = 0;
7901 if (Modifier & Use64BitVectors)
7902 VectorSize = 64;
7903 else if (Modifier & Use128BitVectors)
7904 VectorSize = 128;
7905
7906 // Return type.
7908 if (Modifier & AddRetType) {
7909 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7910 if (Modifier & VectorizeRetType)
7911 Ty = llvm::FixedVectorType::get(
7912 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7913
7914 Tys.push_back(Ty);
7915 }
7916
7917 // Arguments.
7918 if (Modifier & VectorizeArgTypes) {
7919 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7920 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7921 }
7922
7923 if (Modifier & (Add1ArgType | Add2ArgTypes))
7924 Tys.push_back(ArgType);
7925
7926 if (Modifier & Add2ArgTypes)
7927 Tys.push_back(ArgType);
7928
7929 if (Modifier & InventFloatType)
7930 Tys.push_back(FloatTy);
7931
7932 return CGM.getIntrinsic(IntrinsicID, Tys);
7933}
7934
7936 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7937 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7938 unsigned BuiltinID = SISDInfo.BuiltinID;
7939 unsigned int Int = SISDInfo.LLVMIntrinsic;
7940 unsigned Modifier = SISDInfo.TypeModifier;
7941 const char *s = SISDInfo.NameHint;
7942
7943 switch (BuiltinID) {
7944 case NEON::BI__builtin_neon_vcled_s64:
7945 case NEON::BI__builtin_neon_vcled_u64:
7946 case NEON::BI__builtin_neon_vcles_f32:
7947 case NEON::BI__builtin_neon_vcled_f64:
7948 case NEON::BI__builtin_neon_vcltd_s64:
7949 case NEON::BI__builtin_neon_vcltd_u64:
7950 case NEON::BI__builtin_neon_vclts_f32:
7951 case NEON::BI__builtin_neon_vcltd_f64:
7952 case NEON::BI__builtin_neon_vcales_f32:
7953 case NEON::BI__builtin_neon_vcaled_f64:
7954 case NEON::BI__builtin_neon_vcalts_f32:
7955 case NEON::BI__builtin_neon_vcaltd_f64:
7956 // Only one direction of comparisons actually exist, cmle is actually a cmge
7957 // with swapped operands. The table gives us the right intrinsic but we
7958 // still need to do the swap.
7959 std::swap(Ops[0], Ops[1]);
7960 break;
7961 }
7962
7963 assert(Int && "Generic code assumes a valid intrinsic");
7964
7965 // Determine the type(s) of this overloaded AArch64 intrinsic.
7966 const Expr *Arg = E->getArg(0);
7967 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7968 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7969
7970 int j = 0;
7971 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7972 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7973 ai != ae; ++ai, ++j) {
7974 llvm::Type *ArgTy = ai->getType();
7975 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7976 ArgTy->getPrimitiveSizeInBits())
7977 continue;
7978
7979 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7980 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7981 // it before inserting.
7982 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7983 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7984 Ops[j] =
7985 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7986 }
7987
7988 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7989 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7990 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7991 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7992 return CGF.Builder.CreateExtractElement(Result, C0);
7993
7994 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7995}
7996
7998 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7999 const char *NameHint, unsigned Modifier, const CallExpr *E,
8000 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
8001 llvm::Triple::ArchType Arch) {
8002 // Get the last argument, which specifies the vector type.
8003 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8004 std::optional<llvm::APSInt> NeonTypeConst =
8006 if (!NeonTypeConst)
8007 return nullptr;
8008
8009 // Determine the type of this overloaded NEON intrinsic.
8010 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8011 bool Usgn = Type.isUnsigned();
8012 bool Quad = Type.isQuad();
8013 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
8014 const bool AllowBFloatArgsAndRet =
8015 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8016
8017 llvm::FixedVectorType *VTy =
8018 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
8019 llvm::Type *Ty = VTy;
8020 if (!Ty)
8021 return nullptr;
8022
8023 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8024 return Builder.getInt32(addr.getAlignment().getQuantity());
8025 };
8026
8027 unsigned Int = LLVMIntrinsic;
8028 if ((Modifier & UnsignedAlts) && !Usgn)
8029 Int = AltLLVMIntrinsic;
8030
8031 switch (BuiltinID) {
8032 default: break;
8033 case NEON::BI__builtin_neon_splat_lane_v:
8034 case NEON::BI__builtin_neon_splat_laneq_v:
8035 case NEON::BI__builtin_neon_splatq_lane_v:
8036 case NEON::BI__builtin_neon_splatq_laneq_v: {
8037 auto NumElements = VTy->getElementCount();
8038 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8039 NumElements = NumElements * 2;
8040 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8041 NumElements = NumElements.divideCoefficientBy(2);
8042
8043 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8044 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8045 }
8046 case NEON::BI__builtin_neon_vpadd_v:
8047 case NEON::BI__builtin_neon_vpaddq_v:
8048 // We don't allow fp/int overloading of intrinsics.
8049 if (VTy->getElementType()->isFloatingPointTy() &&
8050 Int == Intrinsic::aarch64_neon_addp)
8051 Int = Intrinsic::aarch64_neon_faddp;
8052 break;
8053 case NEON::BI__builtin_neon_vabs_v:
8054 case NEON::BI__builtin_neon_vabsq_v:
8055 if (VTy->getElementType()->isFloatingPointTy())
8056 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
8057 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
8058 case NEON::BI__builtin_neon_vadd_v:
8059 case NEON::BI__builtin_neon_vaddq_v: {
8060 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
8061 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8062 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8063 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
8064 return Builder.CreateBitCast(Ops[0], Ty);
8065 }
8066 case NEON::BI__builtin_neon_vaddhn_v: {
8067 llvm::FixedVectorType *SrcTy =
8068 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8069
8070 // %sum = add <4 x i32> %lhs, %rhs
8071 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8072 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8073 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
8074
8075 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8076 Constant *ShiftAmt =
8077 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8078 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
8079
8080 // %res = trunc <4 x i32> %high to <4 x i16>
8081 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
8082 }
8083 case NEON::BI__builtin_neon_vcale_v:
8084 case NEON::BI__builtin_neon_vcaleq_v:
8085 case NEON::BI__builtin_neon_vcalt_v:
8086 case NEON::BI__builtin_neon_vcaltq_v:
8087 std::swap(Ops[0], Ops[1]);
8088 [[fallthrough]];
8089 case NEON::BI__builtin_neon_vcage_v:
8090 case NEON::BI__builtin_neon_vcageq_v:
8091 case NEON::BI__builtin_neon_vcagt_v:
8092 case NEON::BI__builtin_neon_vcagtq_v: {
8093 llvm::Type *Ty;
8094 switch (VTy->getScalarSizeInBits()) {
8095 default: llvm_unreachable("unexpected type");
8096 case 32:
8097 Ty = FloatTy;
8098 break;
8099 case 64:
8100 Ty = DoubleTy;
8101 break;
8102 case 16:
8103 Ty = HalfTy;
8104 break;
8105 }
8106 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8107 llvm::Type *Tys[] = { VTy, VecFlt };
8108 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8109 return EmitNeonCall(F, Ops, NameHint);
8110 }
8111 case NEON::BI__builtin_neon_vceqz_v:
8112 case NEON::BI__builtin_neon_vceqzq_v:
8113 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8114 ICmpInst::ICMP_EQ, "vceqz");
8115 case NEON::BI__builtin_neon_vcgez_v:
8116 case NEON::BI__builtin_neon_vcgezq_v:
8117 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8118 ICmpInst::ICMP_SGE, "vcgez");
8119 case NEON::BI__builtin_neon_vclez_v:
8120 case NEON::BI__builtin_neon_vclezq_v:
8121 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8122 ICmpInst::ICMP_SLE, "vclez");
8123 case NEON::BI__builtin_neon_vcgtz_v:
8124 case NEON::BI__builtin_neon_vcgtzq_v:
8125 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8126 ICmpInst::ICMP_SGT, "vcgtz");
8127 case NEON::BI__builtin_neon_vcltz_v:
8128 case NEON::BI__builtin_neon_vcltzq_v:
8129 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8130 ICmpInst::ICMP_SLT, "vcltz");
8131 case NEON::BI__builtin_neon_vclz_v:
8132 case NEON::BI__builtin_neon_vclzq_v:
8133 // We generate target-independent intrinsic, which needs a second argument
8134 // for whether or not clz of zero is undefined; on ARM it isn't.
8135 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
8136 break;
8137 case NEON::BI__builtin_neon_vcvt_f32_v:
8138 case NEON::BI__builtin_neon_vcvtq_f32_v:
8139 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8140 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
8141 HasLegalHalfType);
8142 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8143 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8144 case NEON::BI__builtin_neon_vcvt_f16_s16:
8145 case NEON::BI__builtin_neon_vcvt_f16_u16:
8146 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8147 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8148 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8149 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
8150 HasLegalHalfType);
8151 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8152 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8153 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8154 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8155 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8156 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8157 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8158 Function *F = CGM.getIntrinsic(Int, Tys);
8159 return EmitNeonCall(F, Ops, "vcvt_n");
8160 }
8161 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8162 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8163 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8164 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8165 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8166 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8167 Function *F = CGM.getIntrinsic(Int, Tys);
8168 return EmitNeonCall(F, Ops, "vcvt_n");
8169 }
8170 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8171 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8172 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8173 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8174 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8175 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8176 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8177 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8178 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8179 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8180 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8181 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8182 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8183 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8184 return EmitNeonCall(F, Ops, "vcvt_n");
8185 }
8186 case NEON::BI__builtin_neon_vcvt_s32_v:
8187 case NEON::BI__builtin_neon_vcvt_u32_v:
8188 case NEON::BI__builtin_neon_vcvt_s64_v:
8189 case NEON::BI__builtin_neon_vcvt_u64_v:
8190 case NEON::BI__builtin_neon_vcvt_s16_f16:
8191 case NEON::BI__builtin_neon_vcvt_u16_f16:
8192 case NEON::BI__builtin_neon_vcvtq_s32_v:
8193 case NEON::BI__builtin_neon_vcvtq_u32_v:
8194 case NEON::BI__builtin_neon_vcvtq_s64_v:
8195 case NEON::BI__builtin_neon_vcvtq_u64_v:
8196 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8197 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8198 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8199 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
8200 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
8201 }
8202 case NEON::BI__builtin_neon_vcvta_s16_f16:
8203 case NEON::BI__builtin_neon_vcvta_s32_v:
8204 case NEON::BI__builtin_neon_vcvta_s64_v:
8205 case NEON::BI__builtin_neon_vcvta_u16_f16:
8206 case NEON::BI__builtin_neon_vcvta_u32_v:
8207 case NEON::BI__builtin_neon_vcvta_u64_v:
8208 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8209 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8210 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8211 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8212 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8213 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8214 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8215 case NEON::BI__builtin_neon_vcvtn_s32_v:
8216 case NEON::BI__builtin_neon_vcvtn_s64_v:
8217 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8218 case NEON::BI__builtin_neon_vcvtn_u32_v:
8219 case NEON::BI__builtin_neon_vcvtn_u64_v:
8220 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8221 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8222 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8223 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8224 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8225 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8226 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8227 case NEON::BI__builtin_neon_vcvtp_s32_v:
8228 case NEON::BI__builtin_neon_vcvtp_s64_v:
8229 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8230 case NEON::BI__builtin_neon_vcvtp_u32_v:
8231 case NEON::BI__builtin_neon_vcvtp_u64_v:
8232 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8233 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8234 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8235 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8236 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8237 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8238 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8239 case NEON::BI__builtin_neon_vcvtm_s32_v:
8240 case NEON::BI__builtin_neon_vcvtm_s64_v:
8241 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8242 case NEON::BI__builtin_neon_vcvtm_u32_v:
8243 case NEON::BI__builtin_neon_vcvtm_u64_v:
8244 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8245 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8246 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8247 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8248 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8249 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8250 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8251 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8252 }
8253 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8254 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8255 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8256
8257 }
8258 case NEON::BI__builtin_neon_vext_v:
8259 case NEON::BI__builtin_neon_vextq_v: {
8260 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8261 SmallVector<int, 16> Indices;
8262 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8263 Indices.push_back(i+CV);
8264
8265 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8266 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8267 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
8268 }
8269 case NEON::BI__builtin_neon_vfma_v:
8270 case NEON::BI__builtin_neon_vfmaq_v: {
8271 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8272 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8273 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8274
8275 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8277 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8278 {Ops[1], Ops[2], Ops[0]});
8279 }
8280 case NEON::BI__builtin_neon_vld1_v:
8281 case NEON::BI__builtin_neon_vld1q_v: {
8282 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8283 Ops.push_back(getAlignmentValue32(PtrOp0));
8284 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
8285 }
8286 case NEON::BI__builtin_neon_vld1_x2_v:
8287 case NEON::BI__builtin_neon_vld1q_x2_v:
8288 case NEON::BI__builtin_neon_vld1_x3_v:
8289 case NEON::BI__builtin_neon_vld1q_x3_v:
8290 case NEON::BI__builtin_neon_vld1_x4_v:
8291 case NEON::BI__builtin_neon_vld1q_x4_v: {
8292 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8293 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8294 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
8295 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8296 }
8297 case NEON::BI__builtin_neon_vld2_v:
8298 case NEON::BI__builtin_neon_vld2q_v:
8299 case NEON::BI__builtin_neon_vld3_v:
8300 case NEON::BI__builtin_neon_vld3q_v:
8301 case NEON::BI__builtin_neon_vld4_v:
8302 case NEON::BI__builtin_neon_vld4q_v:
8303 case NEON::BI__builtin_neon_vld2_dup_v:
8304 case NEON::BI__builtin_neon_vld2q_dup_v:
8305 case NEON::BI__builtin_neon_vld3_dup_v:
8306 case NEON::BI__builtin_neon_vld3q_dup_v:
8307 case NEON::BI__builtin_neon_vld4_dup_v:
8308 case NEON::BI__builtin_neon_vld4q_dup_v: {
8309 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8310 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8311 Value *Align = getAlignmentValue32(PtrOp1);
8312 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8313 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8314 }
8315 case NEON::BI__builtin_neon_vld1_dup_v:
8316 case NEON::BI__builtin_neon_vld1q_dup_v: {
8317 Value *V = PoisonValue::get(Ty);
8318 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8319 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8320 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8321 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8322 return EmitNeonSplat(Ops[0], CI);
8323 }
8324 case NEON::BI__builtin_neon_vld2_lane_v:
8325 case NEON::BI__builtin_neon_vld2q_lane_v:
8326 case NEON::BI__builtin_neon_vld3_lane_v:
8327 case NEON::BI__builtin_neon_vld3q_lane_v:
8328 case NEON::BI__builtin_neon_vld4_lane_v:
8329 case NEON::BI__builtin_neon_vld4q_lane_v: {
8330 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8331 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8332 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8333 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8334 Ops.push_back(getAlignmentValue32(PtrOp1));
8335 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8336 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8337 }
8338 case NEON::BI__builtin_neon_vmovl_v: {
8339 llvm::FixedVectorType *DTy =
8340 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8341 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8342 if (Usgn)
8343 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8344 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8345 }
8346 case NEON::BI__builtin_neon_vmovn_v: {
8347 llvm::FixedVectorType *QTy =
8348 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8349 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8350 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8351 }
8352 case NEON::BI__builtin_neon_vmull_v:
8353 // FIXME: the integer vmull operations could be emitted in terms of pure
8354 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8355 // hoisting the exts outside loops. Until global ISel comes along that can
8356 // see through such movement this leads to bad CodeGen. So we need an
8357 // intrinsic for now.
8358 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8359 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8360 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8361 case NEON::BI__builtin_neon_vpadal_v:
8362 case NEON::BI__builtin_neon_vpadalq_v: {
8363 // The source operand type has twice as many elements of half the size.
8364 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8365 llvm::Type *EltTy =
8366 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8367 auto *NarrowTy =
8368 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8369 llvm::Type *Tys[2] = { Ty, NarrowTy };
8370 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8371 }
8372 case NEON::BI__builtin_neon_vpaddl_v:
8373 case NEON::BI__builtin_neon_vpaddlq_v: {
8374 // The source operand type has twice as many elements of half the size.
8375 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8376 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8377 auto *NarrowTy =
8378 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8379 llvm::Type *Tys[2] = { Ty, NarrowTy };
8380 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8381 }
8382 case NEON::BI__builtin_neon_vqdmlal_v:
8383 case NEON::BI__builtin_neon_vqdmlsl_v: {
8384 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8385 Ops[1] =
8386 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8387 Ops.resize(2);
8388 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8389 }
8390 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8391 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8392 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8393 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8394 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8395 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8396 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8397 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8398 RTy->getNumElements() * 2);
8399 llvm::Type *Tys[2] = {
8400 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8401 /*isQuad*/ false))};
8402 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8403 }
8404 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8405 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8406 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8407 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8408 llvm::Type *Tys[2] = {
8409 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8410 /*isQuad*/ true))};
8411 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8412 }
8413 case NEON::BI__builtin_neon_vqshl_n_v:
8414 case NEON::BI__builtin_neon_vqshlq_n_v:
8415 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8416 1, false);
8417 case NEON::BI__builtin_neon_vqshlu_n_v:
8418 case NEON::BI__builtin_neon_vqshluq_n_v:
8419 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8420 1, false);
8421 case NEON::BI__builtin_neon_vrecpe_v:
8422 case NEON::BI__builtin_neon_vrecpeq_v:
8423 case NEON::BI__builtin_neon_vrsqrte_v:
8424 case NEON::BI__builtin_neon_vrsqrteq_v:
8425 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8426 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8427 case NEON::BI__builtin_neon_vrndi_v:
8428 case NEON::BI__builtin_neon_vrndiq_v:
8429 Int = Builder.getIsFPConstrained()
8430 ? Intrinsic::experimental_constrained_nearbyint
8431 : Intrinsic::nearbyint;
8432 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8433 case NEON::BI__builtin_neon_vrshr_n_v:
8434 case NEON::BI__builtin_neon_vrshrq_n_v:
8435 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8436 1, true);
8437 case NEON::BI__builtin_neon_vsha512hq_u64:
8438 case NEON::BI__builtin_neon_vsha512h2q_u64:
8439 case NEON::BI__builtin_neon_vsha512su0q_u64:
8440 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8441 Function *F = CGM.getIntrinsic(Int);
8442 return EmitNeonCall(F, Ops, "");
8443 }
8444 case NEON::BI__builtin_neon_vshl_n_v:
8445 case NEON::BI__builtin_neon_vshlq_n_v:
8446 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8447 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8448 "vshl_n");
8449 case NEON::BI__builtin_neon_vshll_n_v: {
8450 llvm::FixedVectorType *SrcTy =
8451 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8452 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8453 if (Usgn)
8454 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8455 else
8456 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8457 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8458 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8459 }
8460 case NEON::BI__builtin_neon_vshrn_n_v: {
8461 llvm::FixedVectorType *SrcTy =
8462 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8463 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8464 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8465 if (Usgn)
8466 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8467 else
8468 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8469 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8470 }
8471 case NEON::BI__builtin_neon_vshr_n_v:
8472 case NEON::BI__builtin_neon_vshrq_n_v:
8473 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8474 case NEON::BI__builtin_neon_vst1_v:
8475 case NEON::BI__builtin_neon_vst1q_v:
8476 case NEON::BI__builtin_neon_vst2_v:
8477 case NEON::BI__builtin_neon_vst2q_v:
8478 case NEON::BI__builtin_neon_vst3_v:
8479 case NEON::BI__builtin_neon_vst3q_v:
8480 case NEON::BI__builtin_neon_vst4_v:
8481 case NEON::BI__builtin_neon_vst4q_v:
8482 case NEON::BI__builtin_neon_vst2_lane_v:
8483 case NEON::BI__builtin_neon_vst2q_lane_v:
8484 case NEON::BI__builtin_neon_vst3_lane_v:
8485 case NEON::BI__builtin_neon_vst3q_lane_v:
8486 case NEON::BI__builtin_neon_vst4_lane_v:
8487 case NEON::BI__builtin_neon_vst4q_lane_v: {
8488 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8489 Ops.push_back(getAlignmentValue32(PtrOp0));
8490 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8491 }
8492 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8493 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8494 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8495 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8496 case NEON::BI__builtin_neon_vsm4eq_u32: {
8497 Function *F = CGM.getIntrinsic(Int);
8498 return EmitNeonCall(F, Ops, "");
8499 }
8500 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8501 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8502 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8503 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8504 Function *F = CGM.getIntrinsic(Int);
8505 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8506 return EmitNeonCall(F, Ops, "");
8507 }
8508 case NEON::BI__builtin_neon_vst1_x2_v:
8509 case NEON::BI__builtin_neon_vst1q_x2_v:
8510 case NEON::BI__builtin_neon_vst1_x3_v:
8511 case NEON::BI__builtin_neon_vst1q_x3_v:
8512 case NEON::BI__builtin_neon_vst1_x4_v:
8513 case NEON::BI__builtin_neon_vst1q_x4_v: {
8514 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8515 // in AArch64 it comes last. We may want to stick to one or another.
8516 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8517 Arch == llvm::Triple::aarch64_32) {
8518 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8519 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8520 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8521 }
8522 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8523 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8524 }
8525 case NEON::BI__builtin_neon_vsubhn_v: {
8526 llvm::FixedVectorType *SrcTy =
8527 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8528
8529 // %sum = add <4 x i32> %lhs, %rhs
8530 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8531 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8532 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8533
8534 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8535 Constant *ShiftAmt =
8536 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8537 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8538
8539 // %res = trunc <4 x i32> %high to <4 x i16>
8540 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8541 }
8542 case NEON::BI__builtin_neon_vtrn_v:
8543 case NEON::BI__builtin_neon_vtrnq_v: {
8544 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8545 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8546 Value *SV = nullptr;
8547
8548 for (unsigned vi = 0; vi != 2; ++vi) {
8549 SmallVector<int, 16> Indices;
8550 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8551 Indices.push_back(i+vi);
8552 Indices.push_back(i+e+vi);
8553 }
8554 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8555 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8556 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8557 }
8558 return SV;
8559 }
8560 case NEON::BI__builtin_neon_vtst_v:
8561 case NEON::BI__builtin_neon_vtstq_v: {
8562 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8563 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8564 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8565 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8566 ConstantAggregateZero::get(Ty));
8567 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8568 }
8569 case NEON::BI__builtin_neon_vuzp_v:
8570 case NEON::BI__builtin_neon_vuzpq_v: {
8571 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8572 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8573 Value *SV = nullptr;
8574
8575 for (unsigned vi = 0; vi != 2; ++vi) {
8576 SmallVector<int, 16> Indices;
8577 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8578 Indices.push_back(2*i+vi);
8579
8580 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8581 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8582 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8583 }
8584 return SV;
8585 }
8586 case NEON::BI__builtin_neon_vxarq_u64: {
8587 Function *F = CGM.getIntrinsic(Int);
8588 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8589 return EmitNeonCall(F, Ops, "");
8590 }
8591 case NEON::BI__builtin_neon_vzip_v:
8592 case NEON::BI__builtin_neon_vzipq_v: {
8593 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8594 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8595 Value *SV = nullptr;
8596
8597 for (unsigned vi = 0; vi != 2; ++vi) {
8598 SmallVector<int, 16> Indices;
8599 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8600 Indices.push_back((i + vi*e) >> 1);
8601 Indices.push_back(((i + vi*e) >> 1)+e);
8602 }
8603 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8604 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8605 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8606 }
8607 return SV;
8608 }
8609 case NEON::BI__builtin_neon_vdot_s32:
8610 case NEON::BI__builtin_neon_vdot_u32:
8611 case NEON::BI__builtin_neon_vdotq_s32:
8612 case NEON::BI__builtin_neon_vdotq_u32: {
8613 auto *InputTy =
8614 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8615 llvm::Type *Tys[2] = { Ty, InputTy };
8616 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8617 }
8618 case NEON::BI__builtin_neon_vfmlal_low_f16:
8619 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8620 auto *InputTy =
8621 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8622 llvm::Type *Tys[2] = { Ty, InputTy };
8623 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8624 }
8625 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8626 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8627 auto *InputTy =
8628 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8629 llvm::Type *Tys[2] = { Ty, InputTy };
8630 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8631 }
8632 case NEON::BI__builtin_neon_vfmlal_high_f16:
8633 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8634 auto *InputTy =
8635 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8636 llvm::Type *Tys[2] = { Ty, InputTy };
8637 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8638 }
8639 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8640 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8641 auto *InputTy =
8642 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8643 llvm::Type *Tys[2] = { Ty, InputTy };
8644 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8645 }
8646 case NEON::BI__builtin_neon_vmmlaq_s32:
8647 case NEON::BI__builtin_neon_vmmlaq_u32: {
8648 auto *InputTy =
8649 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8650 llvm::Type *Tys[2] = { Ty, InputTy };
8651 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8652 }
8653 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8654 auto *InputTy =
8655 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8656 llvm::Type *Tys[2] = { Ty, InputTy };
8657 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8658 }
8659 case NEON::BI__builtin_neon_vusdot_s32:
8660 case NEON::BI__builtin_neon_vusdotq_s32: {
8661 auto *InputTy =
8662 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8663 llvm::Type *Tys[2] = { Ty, InputTy };
8664 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8665 }
8666 case NEON::BI__builtin_neon_vbfdot_f32:
8667 case NEON::BI__builtin_neon_vbfdotq_f32: {
8668 llvm::Type *InputTy =
8669 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8670 llvm::Type *Tys[2] = { Ty, InputTy };
8671 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8672 }
8673 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8674 llvm::Type *Tys[1] = { Ty };
8675 Function *F = CGM.getIntrinsic(Int, Tys);
8676 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8677 }
8678
8679 }
8680
8681 assert(Int && "Expected valid intrinsic number");
8682
8683 // Determine the type(s) of this overloaded AArch64 intrinsic.
8684 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8685
8686 Value *Result = EmitNeonCall(F, Ops, NameHint);
8687 llvm::Type *ResultType = ConvertType(E->getType());
8688 // AArch64 intrinsic one-element vector type cast to
8689 // scalar type expected by the builtin
8690 return Builder.CreateBitCast(Result, ResultType, NameHint);
8691}
8692
8694 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8695 const CmpInst::Predicate Ip, const Twine &Name) {
8696 llvm::Type *OTy = Op->getType();
8697
8698 // FIXME: this is utterly horrific. We should not be looking at previous
8699 // codegen context to find out what needs doing. Unfortunately TableGen
8700 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8701 // (etc).
8702 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8703 OTy = BI->getOperand(0)->getType();
8704
8705 Op = Builder.CreateBitCast(Op, OTy);
8706 if (OTy->getScalarType()->isFloatingPointTy()) {
8707 if (Fp == CmpInst::FCMP_OEQ)
8708 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8709 else
8710 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8711 } else {
8712 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8713 }
8714 return Builder.CreateSExt(Op, Ty, Name);
8715}
8716
8718 Value *ExtOp, Value *IndexOp,
8719 llvm::Type *ResTy, unsigned IntID,
8720 const char *Name) {
8722 if (ExtOp)
8723 TblOps.push_back(ExtOp);
8724
8725 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8726 SmallVector<int, 16> Indices;
8727 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8728 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8729 Indices.push_back(2*i);
8730 Indices.push_back(2*i+1);
8731 }
8732
8733 int PairPos = 0, End = Ops.size() - 1;
8734 while (PairPos < End) {
8735 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8736 Ops[PairPos+1], Indices,
8737 Name));
8738 PairPos += 2;
8739 }
8740
8741 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8742 // of the 128-bit lookup table with zero.
8743 if (PairPos == End) {
8744 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8745 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8746 ZeroTbl, Indices, Name));
8747 }
8748
8749 Function *TblF;
8750 TblOps.push_back(IndexOp);
8751 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8752
8753 return CGF.EmitNeonCall(TblF, TblOps, Name);
8754}
8755
8756Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8757 unsigned Value;
8758 switch (BuiltinID) {
8759 default:
8760 return nullptr;
8761 case clang::ARM::BI__builtin_arm_nop:
8762 Value = 0;
8763 break;
8764 case clang::ARM::BI__builtin_arm_yield:
8765 case clang::ARM::BI__yield:
8766 Value = 1;
8767 break;
8768 case clang::ARM::BI__builtin_arm_wfe:
8769 case clang::ARM::BI__wfe:
8770 Value = 2;
8771 break;
8772 case clang::ARM::BI__builtin_arm_wfi:
8773 case clang::ARM::BI__wfi:
8774 Value = 3;
8775 break;
8776 case clang::ARM::BI__builtin_arm_sev:
8777 case clang::ARM::BI__sev:
8778 Value = 4;
8779 break;
8780 case clang::ARM::BI__builtin_arm_sevl:
8781 case clang::ARM::BI__sevl:
8782 Value = 5;
8783 break;
8784 }
8785
8786 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8787 llvm::ConstantInt::get(Int32Ty, Value));
8788}
8789
8794};
8795
8796// Generates the IR for __builtin_read_exec_*.
8797// Lowers the builtin to amdgcn_ballot intrinsic.
8799 llvm::Type *RegisterType,
8800 llvm::Type *ValueType, bool isExecHi) {
8801 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8802 CodeGen::CodeGenModule &CGM = CGF.CGM;
8803
8804 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8805 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8806
8807 if (isExecHi) {
8808 Value *Rt2 = Builder.CreateLShr(Call, 32);
8809 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8810 return Rt2;
8811 }
8812
8813 return Call;
8814}
8815
8816// Generates the IR for the read/write special register builtin,
8817// ValueType is the type of the value that is to be written or read,
8818// RegisterType is the type of the register being written to or read from.
8820 const CallExpr *E,
8821 llvm::Type *RegisterType,
8822 llvm::Type *ValueType,
8823 SpecialRegisterAccessKind AccessKind,
8824 StringRef SysReg = "") {
8825 // write and register intrinsics only support 32, 64 and 128 bit operations.
8826 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8827 RegisterType->isIntegerTy(128)) &&
8828 "Unsupported size for register.");
8829
8830 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8831 CodeGen::CodeGenModule &CGM = CGF.CGM;
8832 LLVMContext &Context = CGM.getLLVMContext();
8833
8834 if (SysReg.empty()) {
8835 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8836 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8837 }
8838
8839 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8840 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8841 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8842
8843 llvm::Type *Types[] = { RegisterType };
8844
8845 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8846 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8847 && "Can't fit 64-bit value in 32-bit register");
8848
8849 if (AccessKind != Write) {
8850 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8851 llvm::Function *F = CGM.getIntrinsic(
8852 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8853 : llvm::Intrinsic::read_register,
8854 Types);
8855 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8856
8857 if (MixedTypes)
8858 // Read into 64 bit register and then truncate result to 32 bit.
8859 return Builder.CreateTrunc(Call, ValueType);
8860
8861 if (ValueType->isPointerTy())
8862 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8863 return Builder.CreateIntToPtr(Call, ValueType);
8864
8865 return Call;
8866 }
8867
8868 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8869 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8870 if (MixedTypes) {
8871 // Extend 32 bit write value to 64 bit to pass to write.
8872 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8873 return Builder.CreateCall(F, { Metadata, ArgValue });
8874 }
8875
8876 if (ValueType->isPointerTy()) {
8877 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8878 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8879 return Builder.CreateCall(F, { Metadata, ArgValue });
8880 }
8881
8882 return Builder.CreateCall(F, { Metadata, ArgValue });
8883}
8884
8885/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8886/// argument that specifies the vector type.
8887static bool HasExtraNeonArgument(unsigned BuiltinID) {
8888 switch (BuiltinID) {
8889 default: break;
8890 case NEON::BI__builtin_neon_vget_lane_i8:
8891 case NEON::BI__builtin_neon_vget_lane_i16:
8892 case NEON::BI__builtin_neon_vget_lane_bf16:
8893 case NEON::BI__builtin_neon_vget_lane_i32:
8894 case NEON::BI__builtin_neon_vget_lane_i64:
8895 case NEON::BI__builtin_neon_vget_lane_f32:
8896 case NEON::BI__builtin_neon_vgetq_lane_i8:
8897 case NEON::BI__builtin_neon_vgetq_lane_i16:
8898 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8899 case NEON::BI__builtin_neon_vgetq_lane_i32:
8900 case NEON::BI__builtin_neon_vgetq_lane_i64:
8901 case NEON::BI__builtin_neon_vgetq_lane_f32:
8902 case NEON::BI__builtin_neon_vduph_lane_bf16:
8903 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8904 case NEON::BI__builtin_neon_vset_lane_i8:
8905 case NEON::BI__builtin_neon_vset_lane_i16:
8906 case NEON::BI__builtin_neon_vset_lane_bf16:
8907 case NEON::BI__builtin_neon_vset_lane_i32:
8908 case NEON::BI__builtin_neon_vset_lane_i64:
8909 case NEON::BI__builtin_neon_vset_lane_f32:
8910 case NEON::BI__builtin_neon_vsetq_lane_i8:
8911 case NEON::BI__builtin_neon_vsetq_lane_i16:
8912 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8913 case NEON::BI__builtin_neon_vsetq_lane_i32:
8914 case NEON::BI__builtin_neon_vsetq_lane_i64:
8915 case NEON::BI__builtin_neon_vsetq_lane_f32:
8916 case NEON::BI__builtin_neon_vsha1h_u32:
8917 case NEON::BI__builtin_neon_vsha1cq_u32:
8918 case NEON::BI__builtin_neon_vsha1pq_u32:
8919 case NEON::BI__builtin_neon_vsha1mq_u32:
8920 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8921 case clang::ARM::BI_MoveToCoprocessor:
8922 case clang::ARM::BI_MoveToCoprocessor2:
8923 return false;
8924 }
8925 return true;
8926}
8927
8928Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8929 const CallExpr *E,
8930 ReturnValueSlot ReturnValue,
8931 llvm::Triple::ArchType Arch) {
8932 if (auto Hint = GetValueForARMHint(BuiltinID))
8933 return Hint;
8934
8935 if (BuiltinID == clang::ARM::BI__emit) {
8936 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8937 llvm::FunctionType *FTy =
8938 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8939
8941 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8942 llvm_unreachable("Sema will ensure that the parameter is constant");
8943
8944 llvm::APSInt Value = Result.Val.getInt();
8945 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8946
8947 llvm::InlineAsm *Emit =
8948 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8949 /*hasSideEffects=*/true)
8950 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8951 /*hasSideEffects=*/true);
8952
8953 return Builder.CreateCall(Emit);
8954 }
8955
8956 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8957 Value *Option = EmitScalarExpr(E->getArg(0));
8958 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8959 }
8960
8961 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8962 Value *Address = EmitScalarExpr(E->getArg(0));
8963 Value *RW = EmitScalarExpr(E->getArg(1));
8964 Value *IsData = EmitScalarExpr(E->getArg(2));
8965
8966 // Locality is not supported on ARM target
8967 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8968
8969 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8970 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8971 }
8972
8973 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8974 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8975 return Builder.CreateCall(
8976 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8977 }
8978
8979 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8980 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8981 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8982 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8983 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8984 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8985 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8986 return Res;
8987 }
8988
8989
8990 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8991 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8992 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8993 }
8994 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8995 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8996 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8997 "cls");
8998 }
8999
9000 if (BuiltinID == clang::ARM::BI__clear_cache) {
9001 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9002 const FunctionDecl *FD = E->getDirectCallee();
9003 Value *Ops[2];
9004 for (unsigned i = 0; i < 2; i++)
9005 Ops[i] = EmitScalarExpr(E->getArg(i));
9006 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9007 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9008 StringRef Name = FD->getName();
9009 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9010 }
9011
9012 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9013 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9014 Function *F;
9015
9016 switch (BuiltinID) {
9017 default: llvm_unreachable("unexpected builtin");
9018 case clang::ARM::BI__builtin_arm_mcrr:
9019 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
9020 break;
9021 case clang::ARM::BI__builtin_arm_mcrr2:
9022 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
9023 break;
9024 }
9025
9026 // MCRR{2} instruction has 5 operands but
9027 // the intrinsic has 4 because Rt and Rt2
9028 // are represented as a single unsigned 64
9029 // bit integer in the intrinsic definition
9030 // but internally it's represented as 2 32
9031 // bit integers.
9032
9033 Value *Coproc = EmitScalarExpr(E->getArg(0));
9034 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9035 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
9036 Value *CRm = EmitScalarExpr(E->getArg(3));
9037
9038 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9039 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
9040 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
9041 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
9042
9043 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9044 }
9045
9046 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9047 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9048 Function *F;
9049
9050 switch (BuiltinID) {
9051 default: llvm_unreachable("unexpected builtin");
9052 case clang::ARM::BI__builtin_arm_mrrc:
9053 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
9054 break;
9055 case clang::ARM::BI__builtin_arm_mrrc2:
9056 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
9057 break;
9058 }
9059
9060 Value *Coproc = EmitScalarExpr(E->getArg(0));
9061 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9062 Value *CRm = EmitScalarExpr(E->getArg(2));
9063 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
9064
9065 // Returns an unsigned 64 bit integer, represented
9066 // as two 32 bit integers.
9067
9068 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
9069 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
9070 Rt = Builder.CreateZExt(Rt, Int64Ty);
9071 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
9072
9073 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
9074 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
9075 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
9076
9077 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
9078 }
9079
9080 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9081 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9082 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9083 getContext().getTypeSize(E->getType()) == 64) ||
9084 BuiltinID == clang::ARM::BI__ldrexd) {
9085 Function *F;
9086
9087 switch (BuiltinID) {
9088 default: llvm_unreachable("unexpected builtin");
9089 case clang::ARM::BI__builtin_arm_ldaex:
9090 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
9091 break;
9092 case clang::ARM::BI__builtin_arm_ldrexd:
9093 case clang::ARM::BI__builtin_arm_ldrex:
9094 case clang::ARM::BI__ldrexd:
9095 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
9096 break;
9097 }
9098
9099 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9100 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
9101
9102 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9103 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9104 Val0 = Builder.CreateZExt(Val0, Int64Ty);
9105 Val1 = Builder.CreateZExt(Val1, Int64Ty);
9106
9107 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
9108 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9109 Val = Builder.CreateOr(Val, Val1);
9110 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9111 }
9112
9113 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9114 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9115 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9116
9117 QualType Ty = E->getType();
9118 llvm::Type *RealResTy = ConvertType(Ty);
9119 llvm::Type *IntTy =
9120 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9121
9123 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9124 : Intrinsic::arm_ldrex,
9125 UnqualPtrTy);
9126 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
9127 Val->addParamAttr(
9128 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9129
9130 if (RealResTy->isPointerTy())
9131 return Builder.CreateIntToPtr(Val, RealResTy);
9132 else {
9133 llvm::Type *IntResTy = llvm::IntegerType::get(
9134 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9135 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9136 RealResTy);
9137 }
9138 }
9139
9140 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9141 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9142 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9143 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
9145 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9146 : Intrinsic::arm_strexd);
9147 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
9148
9149 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9150 Value *Val = EmitScalarExpr(E->getArg(0));
9151 Builder.CreateStore(Val, Tmp);
9152
9153 Address LdPtr = Tmp.withElementType(STy);
9154 Val = Builder.CreateLoad(LdPtr);
9155
9156 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9157 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9158 Value *StPtr = EmitScalarExpr(E->getArg(1));
9159 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
9160 }
9161
9162 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9163 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9164 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9165 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9166
9167 QualType Ty = E->getArg(0)->getType();
9168 llvm::Type *StoreTy =
9169 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9170
9171 if (StoreVal->getType()->isPointerTy())
9172 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
9173 else {
9174 llvm::Type *IntTy = llvm::IntegerType::get(
9176 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9177 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9178 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
9179 }
9180
9182 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9183 : Intrinsic::arm_strex,
9184 StoreAddr->getType());
9185
9186 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
9187 CI->addParamAttr(
9188 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9189 return CI;
9190 }
9191
9192 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9193 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
9194 return Builder.CreateCall(F);
9195 }
9196
9197 // CRC32
9198 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9199 switch (BuiltinID) {
9200 case clang::ARM::BI__builtin_arm_crc32b:
9201 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
9202 case clang::ARM::BI__builtin_arm_crc32cb:
9203 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
9204 case clang::ARM::BI__builtin_arm_crc32h:
9205 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
9206 case clang::ARM::BI__builtin_arm_crc32ch:
9207 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
9208 case clang::ARM::BI__builtin_arm_crc32w:
9209 case clang::ARM::BI__builtin_arm_crc32d:
9210 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
9211 case clang::ARM::BI__builtin_arm_crc32cw:
9212 case clang::ARM::BI__builtin_arm_crc32cd:
9213 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
9214 }
9215
9216 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9217 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9218 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9219
9220 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9221 // intrinsics, hence we need different codegen for these cases.
9222 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9223 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9224 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9225 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
9226 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
9227 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
9228
9229 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9230 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
9231 return Builder.CreateCall(F, {Res, Arg1b});
9232 } else {
9233 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
9234
9235 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9236 return Builder.CreateCall(F, {Arg0, Arg1});
9237 }
9238 }
9239
9240 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9241 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9242 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9243 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9244 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9245 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9246
9247 SpecialRegisterAccessKind AccessKind = Write;
9248 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9249 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9250 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9251 AccessKind = VolatileRead;
9252
9253 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9254 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9255
9256 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9257 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9258
9259 llvm::Type *ValueType;
9260 llvm::Type *RegisterType;
9261 if (IsPointerBuiltin) {
9262 ValueType = VoidPtrTy;
9264 } else if (Is64Bit) {
9265 ValueType = RegisterType = Int64Ty;
9266 } else {
9267 ValueType = RegisterType = Int32Ty;
9268 }
9269
9270 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9271 AccessKind);
9272 }
9273
9274 if (BuiltinID == ARM::BI__builtin_sponentry) {
9275 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9276 return Builder.CreateCall(F);
9277 }
9278
9279 // Handle MSVC intrinsics before argument evaluation to prevent double
9280 // evaluation.
9281 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
9282 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
9283
9284 // Deal with MVE builtins
9285 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9286 return Result;
9287 // Handle CDE builtins
9288 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9289 return Result;
9290
9291 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9292 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
9293 return P.first == BuiltinID;
9294 });
9295 if (It != end(NEONEquivalentIntrinsicMap))
9296 BuiltinID = It->second;
9297
9298 // Find out if any arguments are required to be integer constant
9299 // expressions.
9300 unsigned ICEArguments = 0;
9302 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9303 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9304
9305 auto getAlignmentValue32 = [&](Address addr) -> Value* {
9306 return Builder.getInt32(addr.getAlignment().getQuantity());
9307 };
9308
9309 Address PtrOp0 = Address::invalid();
9310 Address PtrOp1 = Address::invalid();
9312 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9313 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9314 for (unsigned i = 0, e = NumArgs; i != e; i++) {
9315 if (i == 0) {
9316 switch (BuiltinID) {
9317 case NEON::BI__builtin_neon_vld1_v:
9318 case NEON::BI__builtin_neon_vld1q_v:
9319 case NEON::BI__builtin_neon_vld1q_lane_v:
9320 case NEON::BI__builtin_neon_vld1_lane_v:
9321 case NEON::BI__builtin_neon_vld1_dup_v:
9322 case NEON::BI__builtin_neon_vld1q_dup_v:
9323 case NEON::BI__builtin_neon_vst1_v:
9324 case NEON::BI__builtin_neon_vst1q_v:
9325 case NEON::BI__builtin_neon_vst1q_lane_v:
9326 case NEON::BI__builtin_neon_vst1_lane_v:
9327 case NEON::BI__builtin_neon_vst2_v:
9328 case NEON::BI__builtin_neon_vst2q_v:
9329 case NEON::BI__builtin_neon_vst2_lane_v:
9330 case NEON::BI__builtin_neon_vst2q_lane_v:
9331 case NEON::BI__builtin_neon_vst3_v:
9332 case NEON::BI__builtin_neon_vst3q_v:
9333 case NEON::BI__builtin_neon_vst3_lane_v:
9334 case NEON::BI__builtin_neon_vst3q_lane_v:
9335 case NEON::BI__builtin_neon_vst4_v:
9336 case NEON::BI__builtin_neon_vst4q_v:
9337 case NEON::BI__builtin_neon_vst4_lane_v:
9338 case NEON::BI__builtin_neon_vst4q_lane_v:
9339 // Get the alignment for the argument in addition to the value;
9340 // we'll use it later.
9341 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9342 Ops.push_back(PtrOp0.emitRawPointer(*this));
9343 continue;
9344 }
9345 }
9346 if (i == 1) {
9347 switch (BuiltinID) {
9348 case NEON::BI__builtin_neon_vld2_v:
9349 case NEON::BI__builtin_neon_vld2q_v:
9350 case NEON::BI__builtin_neon_vld3_v:
9351 case NEON::BI__builtin_neon_vld3q_v:
9352 case NEON::BI__builtin_neon_vld4_v:
9353 case NEON::BI__builtin_neon_vld4q_v:
9354 case NEON::BI__builtin_neon_vld2_lane_v:
9355 case NEON::BI__builtin_neon_vld2q_lane_v:
9356 case NEON::BI__builtin_neon_vld3_lane_v:
9357 case NEON::BI__builtin_neon_vld3q_lane_v:
9358 case NEON::BI__builtin_neon_vld4_lane_v:
9359 case NEON::BI__builtin_neon_vld4q_lane_v:
9360 case NEON::BI__builtin_neon_vld2_dup_v:
9361 case NEON::BI__builtin_neon_vld2q_dup_v:
9362 case NEON::BI__builtin_neon_vld3_dup_v:
9363 case NEON::BI__builtin_neon_vld3q_dup_v:
9364 case NEON::BI__builtin_neon_vld4_dup_v:
9365 case NEON::BI__builtin_neon_vld4q_dup_v:
9366 // Get the alignment for the argument in addition to the value;
9367 // we'll use it later.
9368 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9369 Ops.push_back(PtrOp1.emitRawPointer(*this));
9370 continue;
9371 }
9372 }
9373
9374 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9375 }
9376
9377 switch (BuiltinID) {
9378 default: break;
9379
9380 case NEON::BI__builtin_neon_vget_lane_i8:
9381 case NEON::BI__builtin_neon_vget_lane_i16:
9382 case NEON::BI__builtin_neon_vget_lane_i32:
9383 case NEON::BI__builtin_neon_vget_lane_i64:
9384 case NEON::BI__builtin_neon_vget_lane_bf16:
9385 case NEON::BI__builtin_neon_vget_lane_f32:
9386 case NEON::BI__builtin_neon_vgetq_lane_i8:
9387 case NEON::BI__builtin_neon_vgetq_lane_i16:
9388 case NEON::BI__builtin_neon_vgetq_lane_i32:
9389 case NEON::BI__builtin_neon_vgetq_lane_i64:
9390 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9391 case NEON::BI__builtin_neon_vgetq_lane_f32:
9392 case NEON::BI__builtin_neon_vduph_lane_bf16:
9393 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9394 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9395
9396 case NEON::BI__builtin_neon_vrndns_f32: {
9397 Value *Arg = EmitScalarExpr(E->getArg(0));
9398 llvm::Type *Tys[] = {Arg->getType()};
9399 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9400 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9401
9402 case NEON::BI__builtin_neon_vset_lane_i8:
9403 case NEON::BI__builtin_neon_vset_lane_i16:
9404 case NEON::BI__builtin_neon_vset_lane_i32:
9405 case NEON::BI__builtin_neon_vset_lane_i64:
9406 case NEON::BI__builtin_neon_vset_lane_bf16:
9407 case NEON::BI__builtin_neon_vset_lane_f32:
9408 case NEON::BI__builtin_neon_vsetq_lane_i8:
9409 case NEON::BI__builtin_neon_vsetq_lane_i16:
9410 case NEON::BI__builtin_neon_vsetq_lane_i32:
9411 case NEON::BI__builtin_neon_vsetq_lane_i64:
9412 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9413 case NEON::BI__builtin_neon_vsetq_lane_f32:
9414 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9415
9416 case NEON::BI__builtin_neon_vsha1h_u32:
9417 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9418 "vsha1h");
9419 case NEON::BI__builtin_neon_vsha1cq_u32:
9420 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9421 "vsha1h");
9422 case NEON::BI__builtin_neon_vsha1pq_u32:
9423 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9424 "vsha1h");
9425 case NEON::BI__builtin_neon_vsha1mq_u32:
9426 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9427 "vsha1h");
9428
9429 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9430 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9431 "vcvtbfp2bf");
9432 }
9433
9434 // The ARM _MoveToCoprocessor builtins put the input register value as
9435 // the first argument, but the LLVM intrinsic expects it as the third one.
9436 case clang::ARM::BI_MoveToCoprocessor:
9437 case clang::ARM::BI_MoveToCoprocessor2: {
9438 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9439 ? Intrinsic::arm_mcr
9440 : Intrinsic::arm_mcr2);
9441 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9442 Ops[3], Ops[4], Ops[5]});
9443 }
9444 }
9445
9446 // Get the last argument, which specifies the vector type.
9447 assert(HasExtraArg);
9448 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9449 std::optional<llvm::APSInt> Result =
9451 if (!Result)
9452 return nullptr;
9453
9454 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9455 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9456 // Determine the overloaded type of this builtin.
9457 llvm::Type *Ty;
9458 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9459 Ty = FloatTy;
9460 else
9461 Ty = DoubleTy;
9462
9463 // Determine whether this is an unsigned conversion or not.
9464 bool usgn = Result->getZExtValue() == 1;
9465 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9466
9467 // Call the appropriate intrinsic.
9468 Function *F = CGM.getIntrinsic(Int, Ty);
9469 return Builder.CreateCall(F, Ops, "vcvtr");
9470 }
9471
9472 // Determine the type of this overloaded NEON intrinsic.
9473 NeonTypeFlags Type = Result->getZExtValue();
9474 bool usgn = Type.isUnsigned();
9475 bool rightShift = false;
9476
9477 llvm::FixedVectorType *VTy =
9478 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9479 getTarget().hasBFloat16Type());
9480 llvm::Type *Ty = VTy;
9481 if (!Ty)
9482 return nullptr;
9483
9484 // Many NEON builtins have identical semantics and uses in ARM and
9485 // AArch64. Emit these in a single function.
9486 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9487 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9488 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9489 if (Builtin)
9491 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9492 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9493
9494 unsigned Int;
9495 switch (BuiltinID) {
9496 default: return nullptr;
9497 case NEON::BI__builtin_neon_vld1q_lane_v:
9498 // Handle 64-bit integer elements as a special case. Use shuffles of
9499 // one-element vectors to avoid poor code for i64 in the backend.
9500 if (VTy->getElementType()->isIntegerTy(64)) {
9501 // Extract the other lane.
9502 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9503 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9504 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9505 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9506 // Load the value as a one-element vector.
9507 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9508 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9509 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9510 Value *Align = getAlignmentValue32(PtrOp0);
9511 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9512 // Combine them.
9513 int Indices[] = {1 - Lane, Lane};
9514 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9515 }
9516 [[fallthrough]];
9517 case NEON::BI__builtin_neon_vld1_lane_v: {
9518 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9519 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9520 Value *Ld = Builder.CreateLoad(PtrOp0);
9521 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9522 }
9523 case NEON::BI__builtin_neon_vqrshrn_n_v:
9524 Int =
9525 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9526 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9527 1, true);
9528 case NEON::BI__builtin_neon_vqrshrun_n_v:
9529 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9530 Ops, "vqrshrun_n", 1, true);
9531 case NEON::BI__builtin_neon_vqshrn_n_v:
9532 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9533 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9534 1, true);
9535 case NEON::BI__builtin_neon_vqshrun_n_v:
9536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9537 Ops, "vqshrun_n", 1, true);
9538 case NEON::BI__builtin_neon_vrecpe_v:
9539 case NEON::BI__builtin_neon_vrecpeq_v:
9540 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9541 Ops, "vrecpe");
9542 case NEON::BI__builtin_neon_vrshrn_n_v:
9543 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9544 Ops, "vrshrn_n", 1, true);
9545 case NEON::BI__builtin_neon_vrsra_n_v:
9546 case NEON::BI__builtin_neon_vrsraq_n_v:
9547 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9548 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9549 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9550 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9551 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9552 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9553 case NEON::BI__builtin_neon_vsri_n_v:
9554 case NEON::BI__builtin_neon_vsriq_n_v:
9555 rightShift = true;
9556 [[fallthrough]];
9557 case NEON::BI__builtin_neon_vsli_n_v:
9558 case NEON::BI__builtin_neon_vsliq_n_v:
9559 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9560 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9561 Ops, "vsli_n");
9562 case NEON::BI__builtin_neon_vsra_n_v:
9563 case NEON::BI__builtin_neon_vsraq_n_v:
9564 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9565 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9566 return Builder.CreateAdd(Ops[0], Ops[1]);
9567 case NEON::BI__builtin_neon_vst1q_lane_v:
9568 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9569 // a one-element vector and avoid poor code for i64 in the backend.
9570 if (VTy->getElementType()->isIntegerTy(64)) {
9571 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9572 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9573 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9574 Ops[2] = getAlignmentValue32(PtrOp0);
9575 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9576 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9577 Tys), Ops);
9578 }
9579 [[fallthrough]];
9580 case NEON::BI__builtin_neon_vst1_lane_v: {
9581 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9582 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9583 return Builder.CreateStore(Ops[1],
9584 PtrOp0.withElementType(Ops[1]->getType()));
9585 }
9586 case NEON::BI__builtin_neon_vtbl1_v:
9587 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9588 Ops, "vtbl1");
9589 case NEON::BI__builtin_neon_vtbl2_v:
9590 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9591 Ops, "vtbl2");
9592 case NEON::BI__builtin_neon_vtbl3_v:
9593 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9594 Ops, "vtbl3");
9595 case NEON::BI__builtin_neon_vtbl4_v:
9596 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9597 Ops, "vtbl4");
9598 case NEON::BI__builtin_neon_vtbx1_v:
9599 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9600 Ops, "vtbx1");
9601 case NEON::BI__builtin_neon_vtbx2_v:
9602 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9603 Ops, "vtbx2");
9604 case NEON::BI__builtin_neon_vtbx3_v:
9605 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9606 Ops, "vtbx3");
9607 case NEON::BI__builtin_neon_vtbx4_v:
9608 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9609 Ops, "vtbx4");
9610 }
9611}
9612
9613template<typename Integer>
9615 return E->getIntegerConstantExpr(Context)->getExtValue();
9616}
9617
9618static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9619 llvm::Type *T, bool Unsigned) {
9620 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9621 // which finds it convenient to specify signed/unsigned as a boolean flag.
9622 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9623}
9624
9625static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9626 uint32_t Shift, bool Unsigned) {
9627 // MVE helper function for integer shift right. This must handle signed vs
9628 // unsigned, and also deal specially with the case where the shift count is
9629 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9630 // undefined behavior, but in MVE it's legal, so we must convert it to code
9631 // that is not undefined in IR.
9632 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9633 ->getElementType()
9634 ->getPrimitiveSizeInBits();
9635 if (Shift == LaneBits) {
9636 // An unsigned shift of the full lane size always generates zero, so we can
9637 // simply emit a zero vector. A signed shift of the full lane size does the
9638 // same thing as shifting by one bit fewer.
9639 if (Unsigned)
9640 return llvm::Constant::getNullValue(V->getType());
9641 else
9642 --Shift;
9643 }
9644 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9645}
9646
9647static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9648 // MVE-specific helper function for a vector splat, which infers the element
9649 // count of the output vector by knowing that MVE vectors are all 128 bits
9650 // wide.
9651 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9652 return Builder.CreateVectorSplat(Elements, V);
9653}
9654
9655static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9656 CodeGenFunction *CGF,
9657 llvm::Value *V,
9658 llvm::Type *DestType) {
9659 // Convert one MVE vector type into another by reinterpreting its in-register
9660 // format.
9661 //
9662 // Little-endian, this is identical to a bitcast (which reinterprets the
9663 // memory format). But big-endian, they're not necessarily the same, because
9664 // the register and memory formats map to each other differently depending on
9665 // the lane size.
9666 //
9667 // We generate a bitcast whenever we can (if we're little-endian, or if the
9668 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9669 // that performs the different kind of reinterpretation.
9670 if (CGF->getTarget().isBigEndian() &&
9671 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9672 return Builder.CreateCall(
9673 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9674 {DestType, V->getType()}),
9675 V);
9676 } else {
9677 return Builder.CreateBitCast(V, DestType);
9678 }
9679}
9680
9681static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9682 // Make a shufflevector that extracts every other element of a vector (evens
9683 // or odds, as desired).
9684 SmallVector<int, 16> Indices;
9685 unsigned InputElements =
9686 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9687 for (unsigned i = 0; i < InputElements; i += 2)
9688 Indices.push_back(i + Odd);
9689 return Builder.CreateShuffleVector(V, Indices);
9690}
9691
9692static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9693 llvm::Value *V1) {
9694 // Make a shufflevector that interleaves two vectors element by element.
9695 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9696 SmallVector<int, 16> Indices;
9697 unsigned InputElements =
9698 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9699 for (unsigned i = 0; i < InputElements; i++) {
9700 Indices.push_back(i);
9701 Indices.push_back(i + InputElements);
9702 }
9703 return Builder.CreateShuffleVector(V0, V1, Indices);
9704}
9705
9706template<unsigned HighBit, unsigned OtherBits>
9707static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9708 // MVE-specific helper function to make a vector splat of a constant such as
9709 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9710 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9711 unsigned LaneBits = T->getPrimitiveSizeInBits();
9712 uint32_t Value = HighBit << (LaneBits - 1);
9713 if (OtherBits)
9714 Value |= (1UL << (LaneBits - 1)) - 1;
9715 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9716 return ARMMVEVectorSplat(Builder, Lane);
9717}
9718
9719static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9720 llvm::Value *V,
9721 unsigned ReverseWidth) {
9722 // MVE-specific helper function which reverses the elements of a
9723 // vector within every (ReverseWidth)-bit collection of lanes.
9724 SmallVector<int, 16> Indices;
9725 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9726 unsigned Elements = 128 / LaneSize;
9727 unsigned Mask = ReverseWidth / LaneSize - 1;
9728 for (unsigned i = 0; i < Elements; i++)
9729 Indices.push_back(i ^ Mask);
9730 return Builder.CreateShuffleVector(V, Indices);
9731}
9732
9734 const CallExpr *E,
9735 ReturnValueSlot ReturnValue,
9736 llvm::Triple::ArchType Arch) {
9737 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9738 Intrinsic::ID IRIntr;
9739 unsigned NumVectors;
9740
9741 // Code autogenerated by Tablegen will handle all the simple builtins.
9742 switch (BuiltinID) {
9743 #include "clang/Basic/arm_mve_builtin_cg.inc"
9744
9745 // If we didn't match an MVE builtin id at all, go back to the
9746 // main EmitARMBuiltinExpr.
9747 default:
9748 return nullptr;
9749 }
9750
9751 // Anything that breaks from that switch is an MVE builtin that
9752 // needs handwritten code to generate.
9753
9754 switch (CustomCodeGenType) {
9755
9756 case CustomCodeGen::VLD24: {
9759
9760 auto MvecCType = E->getType();
9761 auto MvecLType = ConvertType(MvecCType);
9762 assert(MvecLType->isStructTy() &&
9763 "Return type for vld[24]q should be a struct");
9764 assert(MvecLType->getStructNumElements() == 1 &&
9765 "Return-type struct for vld[24]q should have one element");
9766 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9767 assert(MvecLTypeInner->isArrayTy() &&
9768 "Return-type struct for vld[24]q should contain an array");
9769 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9770 "Array member of return-type struct vld[24]q has wrong length");
9771 auto VecLType = MvecLTypeInner->getArrayElementType();
9772
9773 Tys.push_back(VecLType);
9774
9775 auto Addr = E->getArg(0);
9776 Ops.push_back(EmitScalarExpr(Addr));
9777 Tys.push_back(ConvertType(Addr->getType()));
9778
9779 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9780 Value *LoadResult = Builder.CreateCall(F, Ops);
9781 Value *MvecOut = PoisonValue::get(MvecLType);
9782 for (unsigned i = 0; i < NumVectors; ++i) {
9783 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9784 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9785 }
9786
9787 if (ReturnValue.isNull())
9788 return MvecOut;
9789 else
9790 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9791 }
9792
9793 case CustomCodeGen::VST24: {
9796
9797 auto Addr = E->getArg(0);
9798 Ops.push_back(EmitScalarExpr(Addr));
9799 Tys.push_back(ConvertType(Addr->getType()));
9800
9801 auto MvecCType = E->getArg(1)->getType();
9802 auto MvecLType = ConvertType(MvecCType);
9803 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9804 assert(MvecLType->getStructNumElements() == 1 &&
9805 "Data-type struct for vst2q should have one element");
9806 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9807 assert(MvecLTypeInner->isArrayTy() &&
9808 "Data-type struct for vst2q should contain an array");
9809 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9810 "Array member of return-type struct vld[24]q has wrong length");
9811 auto VecLType = MvecLTypeInner->getArrayElementType();
9812
9813 Tys.push_back(VecLType);
9814
9815 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9816 EmitAggExpr(E->getArg(1), MvecSlot);
9817 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9818 for (unsigned i = 0; i < NumVectors; i++)
9819 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9820
9821 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9822 Value *ToReturn = nullptr;
9823 for (unsigned i = 0; i < NumVectors; i++) {
9824 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9825 ToReturn = Builder.CreateCall(F, Ops);
9826 Ops.pop_back();
9827 }
9828 return ToReturn;
9829 }
9830 }
9831 llvm_unreachable("unknown custom codegen type.");
9832}
9833
9835 const CallExpr *E,
9836 ReturnValueSlot ReturnValue,
9837 llvm::Triple::ArchType Arch) {
9838 switch (BuiltinID) {
9839 default:
9840 return nullptr;
9841#include "clang/Basic/arm_cde_builtin_cg.inc"
9842 }
9843}
9844
9845static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9846 const CallExpr *E,
9848 llvm::Triple::ArchType Arch) {
9849 unsigned int Int = 0;
9850 const char *s = nullptr;
9851
9852 switch (BuiltinID) {
9853 default:
9854 return nullptr;
9855 case NEON::BI__builtin_neon_vtbl1_v:
9856 case NEON::BI__builtin_neon_vqtbl1_v:
9857 case NEON::BI__builtin_neon_vqtbl1q_v:
9858 case NEON::BI__builtin_neon_vtbl2_v:
9859 case NEON::BI__builtin_neon_vqtbl2_v:
9860 case NEON::BI__builtin_neon_vqtbl2q_v:
9861 case NEON::BI__builtin_neon_vtbl3_v:
9862 case NEON::BI__builtin_neon_vqtbl3_v:
9863 case NEON::BI__builtin_neon_vqtbl3q_v:
9864 case NEON::BI__builtin_neon_vtbl4_v:
9865 case NEON::BI__builtin_neon_vqtbl4_v:
9866 case NEON::BI__builtin_neon_vqtbl4q_v:
9867 break;
9868 case NEON::BI__builtin_neon_vtbx1_v:
9869 case NEON::BI__builtin_neon_vqtbx1_v:
9870 case NEON::BI__builtin_neon_vqtbx1q_v:
9871 case NEON::BI__builtin_neon_vtbx2_v:
9872 case NEON::BI__builtin_neon_vqtbx2_v:
9873 case NEON::BI__builtin_neon_vqtbx2q_v:
9874 case NEON::BI__builtin_neon_vtbx3_v:
9875 case NEON::BI__builtin_neon_vqtbx3_v:
9876 case NEON::BI__builtin_neon_vqtbx3q_v:
9877 case NEON::BI__builtin_neon_vtbx4_v:
9878 case NEON::BI__builtin_neon_vqtbx4_v:
9879 case NEON::BI__builtin_neon_vqtbx4q_v:
9880 break;
9881 }
9882
9883 assert(E->getNumArgs() >= 3);
9884
9885 // Get the last argument, which specifies the vector type.
9886 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9887 std::optional<llvm::APSInt> Result =
9889 if (!Result)
9890 return nullptr;
9891
9892 // Determine the type of this overloaded NEON intrinsic.
9893 NeonTypeFlags Type = Result->getZExtValue();
9894 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9895 if (!Ty)
9896 return nullptr;
9897
9898 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9899
9900 // AArch64 scalar builtins are not overloaded, they do not have an extra
9901 // argument that specifies the vector type, need to handle each case.
9902 switch (BuiltinID) {
9903 case NEON::BI__builtin_neon_vtbl1_v: {
9904 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9905 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9906 }
9907 case NEON::BI__builtin_neon_vtbl2_v: {
9908 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9909 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9910 }
9911 case NEON::BI__builtin_neon_vtbl3_v: {
9912 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9913 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9914 }
9915 case NEON::BI__builtin_neon_vtbl4_v: {
9916 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9917 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9918 }
9919 case NEON::BI__builtin_neon_vtbx1_v: {
9920 Value *TblRes =
9921 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9922 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9923
9924 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9925 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9926 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9927
9928 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9929 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9930 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9931 }
9932 case NEON::BI__builtin_neon_vtbx2_v: {
9933 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9934 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9935 }
9936 case NEON::BI__builtin_neon_vtbx3_v: {
9937 Value *TblRes =
9938 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9939 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9940
9941 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9942 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9943 TwentyFourV);
9944 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9945
9946 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9947 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9948 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9949 }
9950 case NEON::BI__builtin_neon_vtbx4_v: {
9951 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9952 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9953 }
9954 case NEON::BI__builtin_neon_vqtbl1_v:
9955 case NEON::BI__builtin_neon_vqtbl1q_v:
9956 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9957 case NEON::BI__builtin_neon_vqtbl2_v:
9958 case NEON::BI__builtin_neon_vqtbl2q_v: {
9959 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9960 case NEON::BI__builtin_neon_vqtbl3_v:
9961 case NEON::BI__builtin_neon_vqtbl3q_v:
9962 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9963 case NEON::BI__builtin_neon_vqtbl4_v:
9964 case NEON::BI__builtin_neon_vqtbl4q_v:
9965 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9966 case NEON::BI__builtin_neon_vqtbx1_v:
9967 case NEON::BI__builtin_neon_vqtbx1q_v:
9968 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9969 case NEON::BI__builtin_neon_vqtbx2_v:
9970 case NEON::BI__builtin_neon_vqtbx2q_v:
9971 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9972 case NEON::BI__builtin_neon_vqtbx3_v:
9973 case NEON::BI__builtin_neon_vqtbx3q_v:
9974 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9975 case NEON::BI__builtin_neon_vqtbx4_v:
9976 case NEON::BI__builtin_neon_vqtbx4q_v:
9977 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9978 }
9979 }
9980
9981 if (!Int)
9982 return nullptr;
9983
9984 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9985 return CGF.EmitNeonCall(F, Ops, s);
9986}
9987
9989 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9990 Op = Builder.CreateBitCast(Op, Int16Ty);
9991 Value *V = PoisonValue::get(VTy);
9992 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9993 Op = Builder.CreateInsertElement(V, Op, CI);
9994 return Op;
9995}
9996
9997/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9998/// access builtin. Only required if it can't be inferred from the base pointer
9999/// operand.
10000llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
10001 switch (TypeFlags.getMemEltType()) {
10002 case SVETypeFlags::MemEltTyDefault:
10003 return getEltType(TypeFlags);
10004 case SVETypeFlags::MemEltTyInt8:
10005 return Builder.getInt8Ty();
10006 case SVETypeFlags::MemEltTyInt16:
10007 return Builder.getInt16Ty();
10008 case SVETypeFlags::MemEltTyInt32:
10009 return Builder.getInt32Ty();
10010 case SVETypeFlags::MemEltTyInt64:
10011 return Builder.getInt64Ty();
10012 }
10013 llvm_unreachable("Unknown MemEltType");
10014}
10015
10016llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
10017 switch (TypeFlags.getEltType()) {
10018 default:
10019 llvm_unreachable("Invalid SVETypeFlag!");
10020
10021 case SVETypeFlags::EltTyInt8:
10022 return Builder.getInt8Ty();
10023 case SVETypeFlags::EltTyInt16:
10024 return Builder.getInt16Ty();
10025 case SVETypeFlags::EltTyInt32:
10026 return Builder.getInt32Ty();
10027 case SVETypeFlags::EltTyInt64:
10028 return Builder.getInt64Ty();
10029 case SVETypeFlags::EltTyInt128:
10030 return Builder.getInt128Ty();
10031
10032 case SVETypeFlags::EltTyFloat16:
10033 return Builder.getHalfTy();
10034 case SVETypeFlags::EltTyFloat32:
10035 return Builder.getFloatTy();
10036 case SVETypeFlags::EltTyFloat64:
10037 return Builder.getDoubleTy();
10038
10039 case SVETypeFlags::EltTyBFloat16:
10040 return Builder.getBFloatTy();
10041
10042 case SVETypeFlags::EltTyBool8:
10043 case SVETypeFlags::EltTyBool16:
10044 case SVETypeFlags::EltTyBool32:
10045 case SVETypeFlags::EltTyBool64:
10046 return Builder.getInt1Ty();
10047 }
10048}
10049
10050// Return the llvm predicate vector type corresponding to the specified element
10051// TypeFlags.
10052llvm::ScalableVectorType *
10054 switch (TypeFlags.getEltType()) {
10055 default: llvm_unreachable("Unhandled SVETypeFlag!");
10056
10057 case SVETypeFlags::EltTyInt8:
10058 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10059 case SVETypeFlags::EltTyInt16:
10060 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10061 case SVETypeFlags::EltTyInt32:
10062 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10063 case SVETypeFlags::EltTyInt64:
10064 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10065
10066 case SVETypeFlags::EltTyBFloat16:
10067 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10068 case SVETypeFlags::EltTyFloat16:
10069 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10070 case SVETypeFlags::EltTyFloat32:
10071 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10072 case SVETypeFlags::EltTyFloat64:
10073 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10074
10075 case SVETypeFlags::EltTyBool8:
10076 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10077 case SVETypeFlags::EltTyBool16:
10078 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10079 case SVETypeFlags::EltTyBool32:
10080 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10081 case SVETypeFlags::EltTyBool64:
10082 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10083 }
10084}
10085
10086// Return the llvm vector type corresponding to the specified element TypeFlags.
10087llvm::ScalableVectorType *
10088CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
10089 switch (TypeFlags.getEltType()) {
10090 default:
10091 llvm_unreachable("Invalid SVETypeFlag!");
10092
10093 case SVETypeFlags::EltTyInt8:
10094 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10095 case SVETypeFlags::EltTyInt16:
10096 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
10097 case SVETypeFlags::EltTyInt32:
10098 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
10099 case SVETypeFlags::EltTyInt64:
10100 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
10101
10102 case SVETypeFlags::EltTyMFloat8:
10103 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10104 case SVETypeFlags::EltTyFloat16:
10105 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
10106 case SVETypeFlags::EltTyBFloat16:
10107 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
10108 case SVETypeFlags::EltTyFloat32:
10109 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
10110 case SVETypeFlags::EltTyFloat64:
10111 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
10112
10113 case SVETypeFlags::EltTyBool8:
10114 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10115 case SVETypeFlags::EltTyBool16:
10116 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10117 case SVETypeFlags::EltTyBool32:
10118 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10119 case SVETypeFlags::EltTyBool64:
10120 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10121 }
10122}
10123
10124llvm::Value *
10126 Function *Ptrue =
10127 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
10128 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
10129}
10130
10131constexpr unsigned SVEBitsPerBlock = 128;
10132
10133static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
10134 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
10135 return llvm::ScalableVectorType::get(EltTy, NumElts);
10136}
10137
10138// Reinterpret the input predicate so that it can be used to correctly isolate
10139// the elements of the specified datatype.
10141 llvm::ScalableVectorType *VTy) {
10142
10143 if (isa<TargetExtType>(Pred->getType()) &&
10144 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
10145 return Pred;
10146
10147 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
10148 if (Pred->getType() == RTy)
10149 return Pred;
10150
10151 unsigned IntID;
10152 llvm::Type *IntrinsicTy;
10153 switch (VTy->getMinNumElements()) {
10154 default:
10155 llvm_unreachable("unsupported element count!");
10156 case 1:
10157 case 2:
10158 case 4:
10159 case 8:
10160 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10161 IntrinsicTy = RTy;
10162 break;
10163 case 16:
10164 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10165 IntrinsicTy = Pred->getType();
10166 break;
10167 }
10168
10169 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
10170 Value *C = Builder.CreateCall(F, Pred);
10171 assert(C->getType() == RTy && "Unexpected return type!");
10172 return C;
10173}
10174
10176 llvm::StructType *Ty) {
10177 if (PredTuple->getType() == Ty)
10178 return PredTuple;
10179
10180 Value *Ret = llvm::PoisonValue::get(Ty);
10181 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
10182 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
10183 Pred = EmitSVEPredicateCast(
10184 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10185 Ret = Builder.CreateInsertValue(Ret, Pred, I);
10186 }
10187
10188 return Ret;
10189}
10190
10193 unsigned IntID) {
10194 auto *ResultTy = getSVEType(TypeFlags);
10195 auto *OverloadedTy =
10196 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
10197
10198 Function *F = nullptr;
10199 if (Ops[1]->getType()->isVectorTy())
10200 // This is the "vector base, scalar offset" case. In order to uniquely
10201 // map this built-in to an LLVM IR intrinsic, we need both the return type
10202 // and the type of the vector base.
10203 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
10204 else
10205 // This is the "scalar base, vector offset case". The type of the offset
10206 // is encoded in the name of the intrinsic. We only need to specify the
10207 // return type in order to uniquely map this built-in to an LLVM IR
10208 // intrinsic.
10209 F = CGM.getIntrinsic(IntID, OverloadedTy);
10210
10211 // At the ACLE level there's only one predicate type, svbool_t, which is
10212 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10213 // actual type being loaded. For example, when loading doubles (i64) the
10214 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10215 // the predicate and the data being loaded must match. Cast to the type
10216 // expected by the intrinsic. The intrinsic itself should be defined in
10217 // a way than enforces relations between parameter types.
10218 Ops[0] = EmitSVEPredicateCast(
10219 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10220
10221 // Pass 0 when the offset is missing. This can only be applied when using
10222 // the "vector base" addressing mode for which ACLE allows no offset. The
10223 // corresponding LLVM IR always requires an offset.
10224 if (Ops.size() == 2) {
10225 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10226 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10227 }
10228
10229 // For "vector base, scalar index" scale the index so that it becomes a
10230 // scalar offset.
10231 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10232 unsigned BytesPerElt =
10233 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10234 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10235 }
10236
10237 Value *Call = Builder.CreateCall(F, Ops);
10238
10239 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10240 // other cases it's folded into a nop.
10241 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
10242 : Builder.CreateSExt(Call, ResultTy);
10243}
10244
10247 unsigned IntID) {
10248 auto *SrcDataTy = getSVEType(TypeFlags);
10249 auto *OverloadedTy =
10250 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
10251
10252 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10253 // it's the first argument. Move it accordingly.
10254 Ops.insert(Ops.begin(), Ops.pop_back_val());
10255
10256 Function *F = nullptr;
10257 if (Ops[2]->getType()->isVectorTy())
10258 // This is the "vector base, scalar offset" case. In order to uniquely
10259 // map this built-in to an LLVM IR intrinsic, we need both the return type
10260 // and the type of the vector base.
10261 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
10262 else
10263 // This is the "scalar base, vector offset case". The type of the offset
10264 // is encoded in the name of the intrinsic. We only need to specify the
10265 // return type in order to uniquely map this built-in to an LLVM IR
10266 // intrinsic.
10267 F = CGM.getIntrinsic(IntID, OverloadedTy);
10268
10269 // Pass 0 when the offset is missing. This can only be applied when using
10270 // the "vector base" addressing mode for which ACLE allows no offset. The
10271 // corresponding LLVM IR always requires an offset.
10272 if (Ops.size() == 3) {
10273 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10274 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10275 }
10276
10277 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10278 // folded into a nop.
10279 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
10280
10281 // At the ACLE level there's only one predicate type, svbool_t, which is
10282 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10283 // actual type being stored. For example, when storing doubles (i64) the
10284 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10285 // the predicate and the data being stored must match. Cast to the type
10286 // expected by the intrinsic. The intrinsic itself should be defined in
10287 // a way that enforces relations between parameter types.
10288 Ops[1] = EmitSVEPredicateCast(
10289 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10290
10291 // For "vector base, scalar index" scale the index so that it becomes a
10292 // scalar offset.
10293 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10294 unsigned BytesPerElt =
10295 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10296 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10297 }
10298
10299 return Builder.CreateCall(F, Ops);
10300}
10301
10304 unsigned IntID) {
10305 // The gather prefetches are overloaded on the vector input - this can either
10306 // be the vector of base addresses or vector of offsets.
10307 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10308 if (!OverloadedTy)
10309 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10310
10311 // Cast the predicate from svbool_t to the right number of elements.
10312 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
10313
10314 // vector + imm addressing modes
10315 if (Ops[1]->getType()->isVectorTy()) {
10316 if (Ops.size() == 3) {
10317 // Pass 0 for 'vector+imm' when the index is omitted.
10318 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10319
10320 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10321 std::swap(Ops[2], Ops[3]);
10322 } else {
10323 // Index needs to be passed as scaled offset.
10324 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10325 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10326 if (BytesPerElt > 1)
10327 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10328 }
10329 }
10330
10331 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10332 return Builder.CreateCall(F, Ops);
10333}
10334
10337 unsigned IntID) {
10338 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10339 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10340 Value *BasePtr = Ops[1];
10341
10342 // Does the load have an offset?
10343 if (Ops.size() > 2)
10344 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10345
10346 Function *F = CGM.getIntrinsic(IntID, {VTy});
10347 return Builder.CreateCall(F, {Predicate, BasePtr});
10348}
10349
10352 unsigned IntID) {
10353 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10354
10355 unsigned N;
10356 switch (IntID) {
10357 case Intrinsic::aarch64_sve_st2:
10358 case Intrinsic::aarch64_sve_st1_pn_x2:
10359 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10360 case Intrinsic::aarch64_sve_st2q:
10361 N = 2;
10362 break;
10363 case Intrinsic::aarch64_sve_st3:
10364 case Intrinsic::aarch64_sve_st3q:
10365 N = 3;
10366 break;
10367 case Intrinsic::aarch64_sve_st4:
10368 case Intrinsic::aarch64_sve_st1_pn_x4:
10369 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10370 case Intrinsic::aarch64_sve_st4q:
10371 N = 4;
10372 break;
10373 default:
10374 llvm_unreachable("unknown intrinsic!");
10375 }
10376
10377 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10378 Value *BasePtr = Ops[1];
10379
10380 // Does the store have an offset?
10381 if (Ops.size() > (2 + N))
10382 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10383
10384 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10385 // need to break up the tuple vector.
10387 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10388 Operands.push_back(Ops[I]);
10389 Operands.append({Predicate, BasePtr});
10390 Function *F = CGM.getIntrinsic(IntID, { VTy });
10391
10392 return Builder.CreateCall(F, Operands);
10393}
10394
10395// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10396// svpmullt_pair intrinsics, with the exception that their results are bitcast
10397// to a wider type.
10400 unsigned BuiltinID) {
10401 // Splat scalar operand to vector (intrinsics with _n infix)
10402 if (TypeFlags.hasSplatOperand()) {
10403 unsigned OpNo = TypeFlags.getSplatOperand();
10404 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10405 }
10406
10407 // The pair-wise function has a narrower overloaded type.
10408 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10409 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10410
10411 // Now bitcast to the wider result type.
10412 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10413 return EmitSVEReinterpret(Call, Ty);
10414}
10415
10417 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10418 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10419 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10420 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10421}
10422
10425 unsigned BuiltinID) {
10426 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10427 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10428 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10429
10430 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10431 Value *BasePtr = Ops[1];
10432
10433 // Implement the index operand if not omitted.
10434 if (Ops.size() > 3)
10435 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10436
10437 Value *PrfOp = Ops.back();
10438
10439 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10440 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10441}
10442
10444 llvm::Type *ReturnTy,
10446 unsigned IntrinsicID,
10447 bool IsZExtReturn) {
10448 QualType LangPTy = E->getArg(1)->getType();
10449 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10450 LangPTy->castAs<PointerType>()->getPointeeType());
10451
10452 // The vector type that is returned may be different from the
10453 // eventual type loaded from memory.
10454 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10455 llvm::ScalableVectorType *MemoryTy = nullptr;
10456 llvm::ScalableVectorType *PredTy = nullptr;
10457 bool IsQuadLoad = false;
10458 switch (IntrinsicID) {
10459 case Intrinsic::aarch64_sve_ld1uwq:
10460 case Intrinsic::aarch64_sve_ld1udq:
10461 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10462 PredTy = llvm::ScalableVectorType::get(
10463 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10464 IsQuadLoad = true;
10465 break;
10466 default:
10467 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10468 PredTy = MemoryTy;
10469 break;
10470 }
10471
10472 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10473 Value *BasePtr = Ops[1];
10474
10475 // Does the load have an offset?
10476 if (Ops.size() > 2)
10477 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10478
10479 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10480 auto *Load =
10481 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10482 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10483 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10484
10485 if (IsQuadLoad)
10486 return Load;
10487
10488 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10489 : Builder.CreateSExt(Load, VectorTy);
10490}
10491
10494 unsigned IntrinsicID) {
10495 QualType LangPTy = E->getArg(1)->getType();
10496 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10497 LangPTy->castAs<PointerType>()->getPointeeType());
10498
10499 // The vector type that is stored may be different from the
10500 // eventual type stored to memory.
10501 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10502 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10503
10504 auto PredTy = MemoryTy;
10505 auto AddrMemoryTy = MemoryTy;
10506 bool IsQuadStore = false;
10507
10508 switch (IntrinsicID) {
10509 case Intrinsic::aarch64_sve_st1wq:
10510 case Intrinsic::aarch64_sve_st1dq:
10511 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10512 PredTy =
10513 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10514 IsQuadStore = true;
10515 break;
10516 default:
10517 break;
10518 }
10519 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10520 Value *BasePtr = Ops[1];
10521
10522 // Does the store have an offset?
10523 if (Ops.size() == 4)
10524 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10525
10526 // Last value is always the data
10527 Value *Val =
10528 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10529
10530 Function *F =
10531 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10532 auto *Store =
10533 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10534 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10535 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10536 return Store;
10537}
10538
10541 unsigned IntID) {
10542 Ops[2] = EmitSVEPredicateCast(
10544
10545 SmallVector<Value *> NewOps;
10546 NewOps.push_back(Ops[2]);
10547
10548 llvm::Value *BasePtr = Ops[3];
10549 llvm::Value *RealSlice = Ops[1];
10550 // If the intrinsic contains the vnum parameter, multiply it with the vector
10551 // size in bytes.
10552 if (Ops.size() == 5) {
10553 Function *StreamingVectorLength =
10554 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10555 llvm::Value *StreamingVectorLengthCall =
10556 Builder.CreateCall(StreamingVectorLength);
10557 llvm::Value *Mulvl =
10558 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10559 // The type of the ptr parameter is void *, so use Int8Ty here.
10560 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10561 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10562 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10563 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
10564 }
10565 NewOps.push_back(BasePtr);
10566 NewOps.push_back(Ops[0]);
10567 NewOps.push_back(RealSlice);
10568 Function *F = CGM.getIntrinsic(IntID);
10569 return Builder.CreateCall(F, NewOps);
10570}
10571
10574 unsigned IntID) {
10575 auto *VecTy = getSVEType(TypeFlags);
10576 Function *F = CGM.getIntrinsic(IntID, VecTy);
10577 if (TypeFlags.isReadZA())
10578 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10579 else if (TypeFlags.isWriteZA())
10580 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10581 return Builder.CreateCall(F, Ops);
10582}
10583
10586 unsigned IntID) {
10587 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10588 if (Ops.size() == 0)
10589 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10590 Function *F = CGM.getIntrinsic(IntID, {});
10591 return Builder.CreateCall(F, Ops);
10592}
10593
10596 unsigned IntID) {
10597 if (Ops.size() == 2)
10598 Ops.push_back(Builder.getInt32(0));
10599 else
10600 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10601 Function *F = CGM.getIntrinsic(IntID, {});
10602 return Builder.CreateCall(F, Ops);
10603}
10604
10605// Limit the usage of scalable llvm IR generated by the ACLE by using the
10606// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10607Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10608 return Builder.CreateVectorSplat(
10609 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10610}
10611
10613 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
10614#ifndef NDEBUG
10615 auto *VecTy = cast<llvm::VectorType>(Ty);
10616 ElementCount EC = VecTy->getElementCount();
10617 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
10618 "Only <1 x i8> expected");
10619#endif
10620 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
10621 }
10622 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10623}
10624
10625Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10626 // FIXME: For big endian this needs an additional REV, or needs a separate
10627 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10628 // instruction is defined as 'bitwise' equivalent from memory point of
10629 // view (when storing/reloading), whereas the svreinterpret builtin
10630 // implements bitwise equivalent cast from register point of view.
10631 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10632
10633 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10634 Value *Tuple = llvm::PoisonValue::get(Ty);
10635
10636 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10637 Value *In = Builder.CreateExtractValue(Val, I);
10638 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10639 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10640 }
10641
10642 return Tuple;
10643 }
10644
10645 return Builder.CreateBitCast(Val, Ty);
10646}
10647
10648static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10650 auto *SplatZero = Constant::getNullValue(Ty);
10651 Ops.insert(Ops.begin(), SplatZero);
10652}
10653
10654static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10656 auto *SplatUndef = UndefValue::get(Ty);
10657 Ops.insert(Ops.begin(), SplatUndef);
10658}
10659
10662 llvm::Type *ResultType,
10663 ArrayRef<Value *> Ops) {
10664 if (TypeFlags.isOverloadNone())
10665 return {};
10666
10667 llvm::Type *DefaultType = getSVEType(TypeFlags);
10668
10669 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10670 return {DefaultType, Ops[1]->getType()};
10671
10672 if (TypeFlags.isOverloadWhileRW())
10673 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10674
10675 if (TypeFlags.isOverloadCvt())
10676 return {Ops[0]->getType(), Ops.back()->getType()};
10677
10678 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10679 ResultType->isVectorTy())
10680 return {ResultType, Ops[1]->getType()};
10681
10682 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10683 return {DefaultType};
10684}
10685
10687 ArrayRef<Value *> Ops) {
10688 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10689 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10690 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10691
10692 if (TypeFlags.isTupleSet())
10693 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10694 return Builder.CreateExtractValue(Ops[0], Idx);
10695}
10696
10698 llvm::Type *Ty,
10699 ArrayRef<Value *> Ops) {
10700 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10701
10702 Value *Tuple = llvm::PoisonValue::get(Ty);
10703 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10704 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10705
10706 return Tuple;
10707}
10708
10710 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10711 SVETypeFlags TypeFlags) {
10712 // Find out if any arguments are required to be integer constant expressions.
10713 unsigned ICEArguments = 0;
10715 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10716 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10717
10718 // Tuple set/get only requires one insert/extract vector, which is
10719 // created by EmitSVETupleSetOrGet.
10720 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10721
10722 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10723 bool IsICE = ICEArguments & (1 << i);
10724 Value *Arg = EmitScalarExpr(E->getArg(i));
10725
10726 if (IsICE) {
10727 // If this is required to be a constant, constant fold it so that we know
10728 // that the generated intrinsic gets a ConstantInt.
10729 std::optional<llvm::APSInt> Result =
10730 E->getArg(i)->getIntegerConstantExpr(getContext());
10731 assert(Result && "Expected argument to be a constant");
10732
10733 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10734 // truncate because the immediate has been range checked and no valid
10735 // immediate requires more than a handful of bits.
10736 *Result = Result->extOrTrunc(32);
10737 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10738 continue;
10739 }
10740
10741 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10742 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10743 Ops.push_back(Builder.CreateExtractValue(Arg, I));
10744
10745 continue;
10746 }
10747
10748 Ops.push_back(Arg);
10749 }
10750}
10751
10753 const CallExpr *E) {
10754 llvm::Type *Ty = ConvertType(E->getType());
10755 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10756 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10757 Value *Val = EmitScalarExpr(E->getArg(0));
10758 return EmitSVEReinterpret(Val, Ty);
10759 }
10760
10761 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10763
10765 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10766 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10767
10768 if (TypeFlags.isLoad())
10769 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10770 TypeFlags.isZExtReturn());
10771 else if (TypeFlags.isStore())
10772 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10773 else if (TypeFlags.isGatherLoad())
10774 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10775 else if (TypeFlags.isScatterStore())
10776 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10777 else if (TypeFlags.isPrefetch())
10778 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10779 else if (TypeFlags.isGatherPrefetch())
10780 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10781 else if (TypeFlags.isStructLoad())
10782 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10783 else if (TypeFlags.isStructStore())
10784 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10785 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10786 return EmitSVETupleSetOrGet(TypeFlags, Ops);
10787 else if (TypeFlags.isTupleCreate())
10788 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10789 else if (TypeFlags.isUndef())
10790 return UndefValue::get(Ty);
10791 else if (Builtin->LLVMIntrinsic != 0) {
10792 // Emit set FPMR for intrinsics that require it
10793 if (TypeFlags.setsFPMR())
10794 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10795 Ops.pop_back_val());
10796 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10798
10799 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10801
10802 // Some ACLE builtins leave out the argument to specify the predicate
10803 // pattern, which is expected to be expanded to an SV_ALL pattern.
10804 if (TypeFlags.isAppendSVALL())
10805 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10806 if (TypeFlags.isInsertOp1SVALL())
10807 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10808
10809 // Predicates must match the main datatype.
10810 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10811 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10812 if (PredTy->getElementType()->isIntegerTy(1))
10813 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10814
10815 // Splat scalar operand to vector (intrinsics with _n infix)
10816 if (TypeFlags.hasSplatOperand()) {
10817 unsigned OpNo = TypeFlags.getSplatOperand();
10818 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10819 }
10820
10821 if (TypeFlags.isReverseCompare())
10822 std::swap(Ops[1], Ops[2]);
10823 else if (TypeFlags.isReverseUSDOT())
10824 std::swap(Ops[1], Ops[2]);
10825 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10826 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10827 std::swap(Ops[1], Ops[2]);
10828 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10829 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10830 std::swap(Ops[1], Ops[3]);
10831
10832 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10833 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10834 llvm::Type *OpndTy = Ops[1]->getType();
10835 auto *SplatZero = Constant::getNullValue(OpndTy);
10836 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10837 }
10838
10839 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10840 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10841 Value *Call = Builder.CreateCall(F, Ops);
10842
10843 if (Call->getType() == Ty)
10844 return Call;
10845
10846 // Predicate results must be converted to svbool_t.
10847 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10848 return EmitSVEPredicateCast(Call, PredTy);
10849 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10850 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
10851
10852 llvm_unreachable("unsupported element count!");
10853 }
10854
10855 switch (BuiltinID) {
10856 default:
10857 return nullptr;
10858
10859 case SVE::BI__builtin_sve_svreinterpret_b: {
10860 auto SVCountTy =
10861 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10862 Function *CastFromSVCountF =
10863 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10864 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10865 }
10866 case SVE::BI__builtin_sve_svreinterpret_c: {
10867 auto SVCountTy =
10868 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10869 Function *CastToSVCountF =
10870 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10871 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10872 }
10873
10874 case SVE::BI__builtin_sve_svpsel_lane_b8:
10875 case SVE::BI__builtin_sve_svpsel_lane_b16:
10876 case SVE::BI__builtin_sve_svpsel_lane_b32:
10877 case SVE::BI__builtin_sve_svpsel_lane_b64:
10878 case SVE::BI__builtin_sve_svpsel_lane_c8:
10879 case SVE::BI__builtin_sve_svpsel_lane_c16:
10880 case SVE::BI__builtin_sve_svpsel_lane_c32:
10881 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10882 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10883 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10884 "aarch64.svcount")) &&
10885 "Unexpected TargetExtType");
10886 auto SVCountTy =
10887 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10888 Function *CastFromSVCountF =
10889 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10890 Function *CastToSVCountF =
10891 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10892
10893 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10894 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10895 llvm::Value *Ops0 =
10896 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10897 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10898 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10899 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10900 }
10901 case SVE::BI__builtin_sve_svmov_b_z: {
10902 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10903 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10904 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10905 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10906 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10907 }
10908
10909 case SVE::BI__builtin_sve_svnot_b_z: {
10910 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10911 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10912 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10913 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10914 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10915 }
10916
10917 case SVE::BI__builtin_sve_svmovlb_u16:
10918 case SVE::BI__builtin_sve_svmovlb_u32:
10919 case SVE::BI__builtin_sve_svmovlb_u64:
10920 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10921
10922 case SVE::BI__builtin_sve_svmovlb_s16:
10923 case SVE::BI__builtin_sve_svmovlb_s32:
10924 case SVE::BI__builtin_sve_svmovlb_s64:
10925 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10926
10927 case SVE::BI__builtin_sve_svmovlt_u16:
10928 case SVE::BI__builtin_sve_svmovlt_u32:
10929 case SVE::BI__builtin_sve_svmovlt_u64:
10930 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10931
10932 case SVE::BI__builtin_sve_svmovlt_s16:
10933 case SVE::BI__builtin_sve_svmovlt_s32:
10934 case SVE::BI__builtin_sve_svmovlt_s64:
10935 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10936
10937 case SVE::BI__builtin_sve_svpmullt_u16:
10938 case SVE::BI__builtin_sve_svpmullt_u64:
10939 case SVE::BI__builtin_sve_svpmullt_n_u16:
10940 case SVE::BI__builtin_sve_svpmullt_n_u64:
10941 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10942
10943 case SVE::BI__builtin_sve_svpmullb_u16:
10944 case SVE::BI__builtin_sve_svpmullb_u64:
10945 case SVE::BI__builtin_sve_svpmullb_n_u16:
10946 case SVE::BI__builtin_sve_svpmullb_n_u64:
10947 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10948
10949 case SVE::BI__builtin_sve_svdup_n_b8:
10950 case SVE::BI__builtin_sve_svdup_n_b16:
10951 case SVE::BI__builtin_sve_svdup_n_b32:
10952 case SVE::BI__builtin_sve_svdup_n_b64: {
10953 Value *CmpNE =
10954 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10955 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10956 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10957 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10958 }
10959
10960 case SVE::BI__builtin_sve_svdupq_n_b8:
10961 case SVE::BI__builtin_sve_svdupq_n_b16:
10962 case SVE::BI__builtin_sve_svdupq_n_b32:
10963 case SVE::BI__builtin_sve_svdupq_n_b64:
10964 case SVE::BI__builtin_sve_svdupq_n_u8:
10965 case SVE::BI__builtin_sve_svdupq_n_s8:
10966 case SVE::BI__builtin_sve_svdupq_n_u64:
10967 case SVE::BI__builtin_sve_svdupq_n_f64:
10968 case SVE::BI__builtin_sve_svdupq_n_s64:
10969 case SVE::BI__builtin_sve_svdupq_n_u16:
10970 case SVE::BI__builtin_sve_svdupq_n_f16:
10971 case SVE::BI__builtin_sve_svdupq_n_bf16:
10972 case SVE::BI__builtin_sve_svdupq_n_s16:
10973 case SVE::BI__builtin_sve_svdupq_n_u32:
10974 case SVE::BI__builtin_sve_svdupq_n_f32:
10975 case SVE::BI__builtin_sve_svdupq_n_s32: {
10976 // These builtins are implemented by storing each element to an array and using
10977 // ld1rq to materialize a vector.
10978 unsigned NumOpnds = Ops.size();
10979
10980 bool IsBoolTy =
10981 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10982
10983 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10984 // so that the compare can use the width that is natural for the expected
10985 // number of predicate lanes.
10986 llvm::Type *EltTy = Ops[0]->getType();
10987 if (IsBoolTy)
10988 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10989
10991 for (unsigned I = 0; I < NumOpnds; ++I)
10992 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10993 Value *Vec = BuildVector(VecOps);
10994
10995 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10996 Value *InsertSubVec = Builder.CreateInsertVector(
10997 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10998
10999 Function *F =
11000 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
11001 Value *DupQLane =
11002 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
11003
11004 if (!IsBoolTy)
11005 return DupQLane;
11006
11007 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11008 Value *Pred = EmitSVEAllTruePred(TypeFlags);
11009
11010 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11011 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
11012 : Intrinsic::aarch64_sve_cmpne_wide,
11013 OverloadedTy);
11014 Value *Call = Builder.CreateCall(
11015 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
11016 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
11017 }
11018
11019 case SVE::BI__builtin_sve_svpfalse_b:
11020 return ConstantInt::getFalse(Ty);
11021
11022 case SVE::BI__builtin_sve_svpfalse_c: {
11023 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
11024 Function *CastToSVCountF =
11025 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
11026 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11027 }
11028
11029 case SVE::BI__builtin_sve_svlen_bf16:
11030 case SVE::BI__builtin_sve_svlen_f16:
11031 case SVE::BI__builtin_sve_svlen_f32:
11032 case SVE::BI__builtin_sve_svlen_f64:
11033 case SVE::BI__builtin_sve_svlen_s8:
11034 case SVE::BI__builtin_sve_svlen_s16:
11035 case SVE::BI__builtin_sve_svlen_s32:
11036 case SVE::BI__builtin_sve_svlen_s64:
11037 case SVE::BI__builtin_sve_svlen_u8:
11038 case SVE::BI__builtin_sve_svlen_u16:
11039 case SVE::BI__builtin_sve_svlen_u32:
11040 case SVE::BI__builtin_sve_svlen_u64: {
11041 SVETypeFlags TF(Builtin->TypeModifier);
11042 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
11043 auto *NumEls =
11044 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11045
11046 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
11047 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
11048 }
11049
11050 case SVE::BI__builtin_sve_svtbl2_u8:
11051 case SVE::BI__builtin_sve_svtbl2_s8:
11052 case SVE::BI__builtin_sve_svtbl2_u16:
11053 case SVE::BI__builtin_sve_svtbl2_s16:
11054 case SVE::BI__builtin_sve_svtbl2_u32:
11055 case SVE::BI__builtin_sve_svtbl2_s32:
11056 case SVE::BI__builtin_sve_svtbl2_u64:
11057 case SVE::BI__builtin_sve_svtbl2_s64:
11058 case SVE::BI__builtin_sve_svtbl2_f16:
11059 case SVE::BI__builtin_sve_svtbl2_bf16:
11060 case SVE::BI__builtin_sve_svtbl2_f32:
11061 case SVE::BI__builtin_sve_svtbl2_f64: {
11062 SVETypeFlags TF(Builtin->TypeModifier);
11063 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
11064 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
11065 return Builder.CreateCall(F, Ops);
11066 }
11067
11068 case SVE::BI__builtin_sve_svset_neonq_s8:
11069 case SVE::BI__builtin_sve_svset_neonq_s16:
11070 case SVE::BI__builtin_sve_svset_neonq_s32:
11071 case SVE::BI__builtin_sve_svset_neonq_s64:
11072 case SVE::BI__builtin_sve_svset_neonq_u8:
11073 case SVE::BI__builtin_sve_svset_neonq_u16:
11074 case SVE::BI__builtin_sve_svset_neonq_u32:
11075 case SVE::BI__builtin_sve_svset_neonq_u64:
11076 case SVE::BI__builtin_sve_svset_neonq_f16:
11077 case SVE::BI__builtin_sve_svset_neonq_f32:
11078 case SVE::BI__builtin_sve_svset_neonq_f64:
11079 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11080 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
11081 }
11082
11083 case SVE::BI__builtin_sve_svget_neonq_s8:
11084 case SVE::BI__builtin_sve_svget_neonq_s16:
11085 case SVE::BI__builtin_sve_svget_neonq_s32:
11086 case SVE::BI__builtin_sve_svget_neonq_s64:
11087 case SVE::BI__builtin_sve_svget_neonq_u8:
11088 case SVE::BI__builtin_sve_svget_neonq_u16:
11089 case SVE::BI__builtin_sve_svget_neonq_u32:
11090 case SVE::BI__builtin_sve_svget_neonq_u64:
11091 case SVE::BI__builtin_sve_svget_neonq_f16:
11092 case SVE::BI__builtin_sve_svget_neonq_f32:
11093 case SVE::BI__builtin_sve_svget_neonq_f64:
11094 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11095 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
11096 }
11097
11098 case SVE::BI__builtin_sve_svdup_neonq_s8:
11099 case SVE::BI__builtin_sve_svdup_neonq_s16:
11100 case SVE::BI__builtin_sve_svdup_neonq_s32:
11101 case SVE::BI__builtin_sve_svdup_neonq_s64:
11102 case SVE::BI__builtin_sve_svdup_neonq_u8:
11103 case SVE::BI__builtin_sve_svdup_neonq_u16:
11104 case SVE::BI__builtin_sve_svdup_neonq_u32:
11105 case SVE::BI__builtin_sve_svdup_neonq_u64:
11106 case SVE::BI__builtin_sve_svdup_neonq_f16:
11107 case SVE::BI__builtin_sve_svdup_neonq_f32:
11108 case SVE::BI__builtin_sve_svdup_neonq_f64:
11109 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11110 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
11111 Builder.getInt64(0));
11112 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11113 {Insert, Builder.getInt64(0)});
11114 }
11115 }
11116
11117 /// Should not happen
11118 return nullptr;
11119}
11120
11121static void swapCommutativeSMEOperands(unsigned BuiltinID,
11123 unsigned MultiVec;
11124 switch (BuiltinID) {
11125 default:
11126 return;
11127 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11128 MultiVec = 1;
11129 break;
11130 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11131 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11132 MultiVec = 2;
11133 break;
11134 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11135 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11136 MultiVec = 4;
11137 break;
11138 }
11139
11140 if (MultiVec > 0)
11141 for (unsigned I = 0; I < MultiVec; ++I)
11142 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11143}
11144
11146 const CallExpr *E) {
11147 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
11149
11151 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11152 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
11153
11154 if (TypeFlags.isLoad() || TypeFlags.isStore())
11155 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11156 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
11157 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11158 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11159 BuiltinID == SME::BI__builtin_sme_svzero_za)
11160 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11161 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11162 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11163 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11164 BuiltinID == SME::BI__builtin_sme_svstr_za)
11165 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11166
11167 // Emit set FPMR for intrinsics that require it
11168 if (TypeFlags.setsFPMR())
11169 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
11170 Ops.pop_back_val());
11171 // Handle builtins which require their multi-vector operands to be swapped
11172 swapCommutativeSMEOperands(BuiltinID, Ops);
11173
11174 // Should not happen!
11175 if (Builtin->LLVMIntrinsic == 0)
11176 return nullptr;
11177
11178 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
11179 // If we already know the streaming mode, don't bother with the intrinsic
11180 // and emit a constant instead
11181 const auto *FD = cast<FunctionDecl>(CurFuncDecl);
11182 if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
11183 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
11184 if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
11185 bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
11186 return ConstantInt::getBool(Builder.getContext(), IsStreaming);
11187 }
11188 }
11189 }
11190
11191 // Predicates must match the main datatype.
11192 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
11193 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11194 if (PredTy->getElementType()->isIntegerTy(1))
11195 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
11196
11197 Function *F =
11198 TypeFlags.isOverloadNone()
11199 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
11200 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
11201
11202 return Builder.CreateCall(F, Ops);
11203}
11204
11206 const CallExpr *E,
11207 llvm::Triple::ArchType Arch) {
11208 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
11209 BuiltinID <= clang::AArch64::LastSVEBuiltin)
11210 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
11211
11212 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
11213 BuiltinID <= clang::AArch64::LastSMEBuiltin)
11214 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
11215
11216 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11217 return EmitAArch64CpuSupports(E);
11218
11219 unsigned HintID = static_cast<unsigned>(-1);
11220 switch (BuiltinID) {
11221 default: break;
11222 case clang::AArch64::BI__builtin_arm_nop:
11223 HintID = 0;
11224 break;
11225 case clang::AArch64::BI__builtin_arm_yield:
11226 case clang::AArch64::BI__yield:
11227 HintID = 1;
11228 break;
11229 case clang::AArch64::BI__builtin_arm_wfe:
11230 case clang::AArch64::BI__wfe:
11231 HintID = 2;
11232 break;
11233 case clang::AArch64::BI__builtin_arm_wfi:
11234 case clang::AArch64::BI__wfi:
11235 HintID = 3;
11236 break;
11237 case clang::AArch64::BI__builtin_arm_sev:
11238 case clang::AArch64::BI__sev:
11239 HintID = 4;
11240 break;
11241 case clang::AArch64::BI__builtin_arm_sevl:
11242 case clang::AArch64::BI__sevl:
11243 HintID = 5;
11244 break;
11245 }
11246
11247 if (HintID != static_cast<unsigned>(-1)) {
11248 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
11249 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
11250 }
11251
11252 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11253 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
11254 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11255 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
11256 }
11257
11258 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11259 // Create call to __arm_sme_state and store the results to the two pointers.
11261 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
11262 false),
11263 "__arm_sme_state"));
11264 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
11265 "aarch64_pstate_sm_compatible");
11266 CI->setAttributes(Attrs);
11267 CI->setCallingConv(
11268 llvm::CallingConv::
11269 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11270 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
11271 EmitPointerWithAlignment(E->getArg(0)));
11272 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
11273 EmitPointerWithAlignment(E->getArg(1)));
11274 }
11275
11276 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11277 assert((getContext().getTypeSize(E->getType()) == 32) &&
11278 "rbit of unusual size!");
11279 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11280 return Builder.CreateCall(
11281 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11282 }
11283 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11284 assert((getContext().getTypeSize(E->getType()) == 64) &&
11285 "rbit of unusual size!");
11286 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11287 return Builder.CreateCall(
11288 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11289 }
11290
11291 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11292 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11293 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11294 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11295 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11296 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11297 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11298 return Res;
11299 }
11300
11301 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11302 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11303 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11304 "cls");
11305 }
11306 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11307 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11308 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11309 "cls");
11310 }
11311
11312 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11313 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11314 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11315 llvm::Type *Ty = Arg->getType();
11316 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11317 Arg, "frint32z");
11318 }
11319
11320 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11321 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11322 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11323 llvm::Type *Ty = Arg->getType();
11324 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11325 Arg, "frint64z");
11326 }
11327
11328 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11329 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11330 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11331 llvm::Type *Ty = Arg->getType();
11332 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11333 Arg, "frint32x");
11334 }
11335
11336 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11337 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11338 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11339 llvm::Type *Ty = Arg->getType();
11340 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11341 Arg, "frint64x");
11342 }
11343
11344 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11345 assert((getContext().getTypeSize(E->getType()) == 32) &&
11346 "__jcvt of unusual size!");
11347 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11348 return Builder.CreateCall(
11349 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11350 }
11351
11352 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11353 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11354 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11355 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11356 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11357 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11358
11359 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11360 // Load from the address via an LLVM intrinsic, receiving a
11361 // tuple of 8 i64 words, and store each one to ValPtr.
11362 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11363 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11364 llvm::Value *ToRet;
11365 for (size_t i = 0; i < 8; i++) {
11366 llvm::Value *ValOffsetPtr =
11367 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11368 Address Addr =
11369 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11370 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11371 }
11372 return ToRet;
11373 } else {
11374 // Load 8 i64 words from ValPtr, and store them to the address
11375 // via an LLVM intrinsic.
11377 Args.push_back(MemAddr);
11378 for (size_t i = 0; i < 8; i++) {
11379 llvm::Value *ValOffsetPtr =
11380 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11381 Address Addr =
11382 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11383 Args.push_back(Builder.CreateLoad(Addr));
11384 }
11385
11386 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11387 ? Intrinsic::aarch64_st64b
11388 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11389 ? Intrinsic::aarch64_st64bv
11390 : Intrinsic::aarch64_st64bv0);
11391 Function *F = CGM.getIntrinsic(Intr);
11392 return Builder.CreateCall(F, Args);
11393 }
11394 }
11395
11396 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11397 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11398
11399 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11400 ? Intrinsic::aarch64_rndr
11401 : Intrinsic::aarch64_rndrrs);
11402 Function *F = CGM.getIntrinsic(Intr);
11403 llvm::Value *Val = Builder.CreateCall(F);
11404 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11405 Value *Status = Builder.CreateExtractValue(Val, 1);
11406
11407 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11408 Builder.CreateStore(RandomValue, MemAddress);
11409 Status = Builder.CreateZExt(Status, Int32Ty);
11410 return Status;
11411 }
11412
11413 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11414 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11415 const FunctionDecl *FD = E->getDirectCallee();
11416 Value *Ops[2];
11417 for (unsigned i = 0; i < 2; i++)
11418 Ops[i] = EmitScalarExpr(E->getArg(i));
11419 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11420 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11421 StringRef Name = FD->getName();
11422 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11423 }
11424
11425 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11426 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11427 getContext().getTypeSize(E->getType()) == 128) {
11428 Function *F =
11429 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11430 ? Intrinsic::aarch64_ldaxp
11431 : Intrinsic::aarch64_ldxp);
11432
11433 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11434 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11435
11436 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11437 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11438 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11439 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11440 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11441
11442 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11443 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11444 Val = Builder.CreateOr(Val, Val1);
11445 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11446 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11447 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11448 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11449
11450 QualType Ty = E->getType();
11451 llvm::Type *RealResTy = ConvertType(Ty);
11452 llvm::Type *IntTy =
11453 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11454
11455 Function *F =
11456 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11457 ? Intrinsic::aarch64_ldaxr
11458 : Intrinsic::aarch64_ldxr,
11459 UnqualPtrTy);
11460 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11461 Val->addParamAttr(
11462 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11463
11464 if (RealResTy->isPointerTy())
11465 return Builder.CreateIntToPtr(Val, RealResTy);
11466
11467 llvm::Type *IntResTy = llvm::IntegerType::get(
11468 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11469 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11470 RealResTy);
11471 }
11472
11473 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11474 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11475 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11476 Function *F =
11477 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11478 ? Intrinsic::aarch64_stlxp
11479 : Intrinsic::aarch64_stxp);
11480 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11481
11482 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11483 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11484
11485 Tmp = Tmp.withElementType(STy);
11486 llvm::Value *Val = Builder.CreateLoad(Tmp);
11487
11488 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11489 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11490 Value *StPtr = EmitScalarExpr(E->getArg(1));
11491 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11492 }
11493
11494 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11495 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11496 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11497 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11498
11499 QualType Ty = E->getArg(0)->getType();
11500 llvm::Type *StoreTy =
11501 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11502
11503 if (StoreVal->getType()->isPointerTy())
11504 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11505 else {
11506 llvm::Type *IntTy = llvm::IntegerType::get(
11508 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11509 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11510 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11511 }
11512
11513 Function *F =
11514 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11515 ? Intrinsic::aarch64_stlxr
11516 : Intrinsic::aarch64_stxr,
11517 StoreAddr->getType());
11518 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11519 CI->addParamAttr(
11520 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11521 return CI;
11522 }
11523
11524 if (BuiltinID == clang::AArch64::BI__getReg) {
11526 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11527 llvm_unreachable("Sema will ensure that the parameter is constant");
11528
11529 llvm::APSInt Value = Result.Val.getInt();
11530 LLVMContext &Context = CGM.getLLVMContext();
11531 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11532
11533 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11534 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11535 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11536
11537 llvm::Function *F =
11538 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11539 return Builder.CreateCall(F, Metadata);
11540 }
11541
11542 if (BuiltinID == clang::AArch64::BI__break) {
11544 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11545 llvm_unreachable("Sema will ensure that the parameter is constant");
11546
11547 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11548 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11549 }
11550
11551 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11552 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11553 return Builder.CreateCall(F);
11554 }
11555
11556 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11557 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11558 llvm::SyncScope::SingleThread);
11559
11560 // CRC32
11561 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11562 switch (BuiltinID) {
11563 case clang::AArch64::BI__builtin_arm_crc32b:
11564 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11565 case clang::AArch64::BI__builtin_arm_crc32cb:
11566 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11567 case clang::AArch64::BI__builtin_arm_crc32h:
11568 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11569 case clang::AArch64::BI__builtin_arm_crc32ch:
11570 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11571 case clang::AArch64::BI__builtin_arm_crc32w:
11572 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11573 case clang::AArch64::BI__builtin_arm_crc32cw:
11574 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11575 case clang::AArch64::BI__builtin_arm_crc32d:
11576 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11577 case clang::AArch64::BI__builtin_arm_crc32cd:
11578 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11579 }
11580
11581 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11582 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11583 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11584 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11585
11586 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11587 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11588
11589 return Builder.CreateCall(F, {Arg0, Arg1});
11590 }
11591
11592 // Memory Operations (MOPS)
11593 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11594 Value *Dst = EmitScalarExpr(E->getArg(0));
11595 Value *Val = EmitScalarExpr(E->getArg(1));
11596 Value *Size = EmitScalarExpr(E->getArg(2));
11597 Val = Builder.CreateTrunc(Val, Int8Ty);
11598 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11599 return Builder.CreateCall(
11600 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11601 }
11602
11603 // Memory Tagging Extensions (MTE) Intrinsics
11604 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11605 switch (BuiltinID) {
11606 case clang::AArch64::BI__builtin_arm_irg:
11607 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11608 case clang::AArch64::BI__builtin_arm_addg:
11609 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11610 case clang::AArch64::BI__builtin_arm_gmi:
11611 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11612 case clang::AArch64::BI__builtin_arm_ldg:
11613 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11614 case clang::AArch64::BI__builtin_arm_stg:
11615 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11616 case clang::AArch64::BI__builtin_arm_subp:
11617 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11618 }
11619
11620 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11621 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11622 Value *Pointer = EmitScalarExpr(E->getArg(0));
11623 Value *Mask = EmitScalarExpr(E->getArg(1));
11624
11625 Mask = Builder.CreateZExt(Mask, Int64Ty);
11626 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11627 {Pointer, Mask});
11628 }
11629 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11630 Value *Pointer = EmitScalarExpr(E->getArg(0));
11631 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11632
11633 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11634 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11635 {Pointer, TagOffset});
11636 }
11637 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11638 Value *Pointer = EmitScalarExpr(E->getArg(0));
11639 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11640
11641 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11642 return Builder.CreateCall(
11643 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11644 }
11645 // Although it is possible to supply a different return
11646 // address (first arg) to this intrinsic, for now we set
11647 // return address same as input address.
11648 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11649 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11650 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11651 {TagAddress, TagAddress});
11652 }
11653 // Although it is possible to supply a different tag (to set)
11654 // to this intrinsic (as first arg), for now we supply
11655 // the tag that is in input address arg (common use case).
11656 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11657 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11658 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11659 {TagAddress, TagAddress});
11660 }
11661 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11662 Value *PointerA = EmitScalarExpr(E->getArg(0));
11663 Value *PointerB = EmitScalarExpr(E->getArg(1));
11664 return Builder.CreateCall(
11665 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11666 }
11667 }
11668
11669 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11670 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11671 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11672 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11673 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11674 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11675 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11676 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11677
11678 SpecialRegisterAccessKind AccessKind = Write;
11679 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11680 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11681 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11682 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11683 AccessKind = VolatileRead;
11684
11685 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11686 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11687
11688 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11689 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11690
11691 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11692 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11693
11694 llvm::Type *ValueType;
11695 llvm::Type *RegisterType = Int64Ty;
11696 if (Is32Bit) {
11697 ValueType = Int32Ty;
11698 } else if (Is128Bit) {
11699 llvm::Type *Int128Ty =
11700 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11701 ValueType = Int128Ty;
11702 RegisterType = Int128Ty;
11703 } else if (IsPointerBuiltin) {
11704 ValueType = VoidPtrTy;
11705 } else {
11706 ValueType = Int64Ty;
11707 };
11708
11709 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11710 AccessKind);
11711 }
11712
11713 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11714 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11715 LLVMContext &Context = CGM.getLLVMContext();
11716
11717 unsigned SysReg =
11718 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11719
11720 std::string SysRegStr;
11721 llvm::raw_string_ostream(SysRegStr) <<
11722 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11723 ((SysReg >> 11) & 7) << ":" <<
11724 ((SysReg >> 7) & 15) << ":" <<
11725 ((SysReg >> 3) & 15) << ":" <<
11726 ( SysReg & 7);
11727
11728 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11729 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11730 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11731
11732 llvm::Type *RegisterType = Int64Ty;
11733 llvm::Type *Types[] = { RegisterType };
11734
11735 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11736 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11737
11738 return Builder.CreateCall(F, Metadata);
11739 }
11740
11741 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11742 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11743
11744 return Builder.CreateCall(F, { Metadata, ArgValue });
11745 }
11746
11747 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11748 llvm::Function *F =
11749 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11750 return Builder.CreateCall(F);
11751 }
11752
11753 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11754 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11755 return Builder.CreateCall(F);
11756 }
11757
11758 if (BuiltinID == clang::AArch64::BI__mulh ||
11759 BuiltinID == clang::AArch64::BI__umulh) {
11760 llvm::Type *ResType = ConvertType(E->getType());
11761 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11762
11763 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11764 Value *LHS =
11765 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11766 Value *RHS =
11767 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11768
11769 Value *MulResult, *HigherBits;
11770 if (IsSigned) {
11771 MulResult = Builder.CreateNSWMul(LHS, RHS);
11772 HigherBits = Builder.CreateAShr(MulResult, 64);
11773 } else {
11774 MulResult = Builder.CreateNUWMul(LHS, RHS);
11775 HigherBits = Builder.CreateLShr(MulResult, 64);
11776 }
11777 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11778
11779 return HigherBits;
11780 }
11781
11782 if (BuiltinID == AArch64::BI__writex18byte ||
11783 BuiltinID == AArch64::BI__writex18word ||
11784 BuiltinID == AArch64::BI__writex18dword ||
11785 BuiltinID == AArch64::BI__writex18qword) {
11786 // Process the args first
11787 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11788 Value *DataArg = EmitScalarExpr(E->getArg(1));
11789
11790 // Read x18 as i8*
11791 llvm::Value *X18 = readX18AsPtr(*this);
11792
11793 // Store val at x18 + offset
11794 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11795 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11796 StoreInst *Store =
11797 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
11798 return Store;
11799 }
11800
11801 if (BuiltinID == AArch64::BI__readx18byte ||
11802 BuiltinID == AArch64::BI__readx18word ||
11803 BuiltinID == AArch64::BI__readx18dword ||
11804 BuiltinID == AArch64::BI__readx18qword) {
11805 // Process the args first
11806 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11807
11808 // Read x18 as i8*
11809 llvm::Value *X18 = readX18AsPtr(*this);
11810
11811 // Load x18 + offset
11812 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11813 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11814 llvm::Type *IntTy = ConvertType(E->getType());
11815 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11816 return Load;
11817 }
11818
11819 if (BuiltinID == AArch64::BI__addx18byte ||
11820 BuiltinID == AArch64::BI__addx18word ||
11821 BuiltinID == AArch64::BI__addx18dword ||
11822 BuiltinID == AArch64::BI__addx18qword ||
11823 BuiltinID == AArch64::BI__incx18byte ||
11824 BuiltinID == AArch64::BI__incx18word ||
11825 BuiltinID == AArch64::BI__incx18dword ||
11826 BuiltinID == AArch64::BI__incx18qword) {
11827 llvm::Type *IntTy;
11828 bool isIncrement;
11829 switch (BuiltinID) {
11830 case AArch64::BI__incx18byte:
11831 IntTy = Int8Ty;
11832 isIncrement = true;
11833 break;
11834 case AArch64::BI__incx18word:
11835 IntTy = Int16Ty;
11836 isIncrement = true;
11837 break;
11838 case AArch64::BI__incx18dword:
11839 IntTy = Int32Ty;
11840 isIncrement = true;
11841 break;
11842 case AArch64::BI__incx18qword:
11843 IntTy = Int64Ty;
11844 isIncrement = true;
11845 break;
11846 default:
11847 IntTy = ConvertType(E->getArg(1)->getType());
11848 isIncrement = false;
11849 break;
11850 }
11851 // Process the args first
11852 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11853 Value *ValToAdd =
11854 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
11855
11856 // Read x18 as i8*
11857 llvm::Value *X18 = readX18AsPtr(*this);
11858
11859 // Load x18 + offset
11860 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11861 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11862 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11863
11864 // Add values
11865 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
11866
11867 // Store val at x18 + offset
11868 StoreInst *Store =
11869 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
11870 return Store;
11871 }
11872
11873 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11874 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11875 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11876 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11877 Value *Arg = EmitScalarExpr(E->getArg(0));
11878 llvm::Type *RetTy = ConvertType(E->getType());
11879 return Builder.CreateBitCast(Arg, RetTy);
11880 }
11881
11882 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11883 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11884 BuiltinID == AArch64::BI_CountLeadingZeros ||
11885 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11886 Value *Arg = EmitScalarExpr(E->getArg(0));
11887 llvm::Type *ArgType = Arg->getType();
11888
11889 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11890 BuiltinID == AArch64::BI_CountLeadingOnes64)
11891 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11892
11893 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11894 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11895
11896 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11897 BuiltinID == AArch64::BI_CountLeadingZeros64)
11898 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11899 return Result;
11900 }
11901
11902 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11903 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11904 Value *Arg = EmitScalarExpr(E->getArg(0));
11905
11906 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11907 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11908 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11909
11910 Value *Result = Builder.CreateCall(F, Arg, "cls");
11911 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11912 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11913 return Result;
11914 }
11915
11916 if (BuiltinID == AArch64::BI_CountOneBits ||
11917 BuiltinID == AArch64::BI_CountOneBits64) {
11918 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11919 llvm::Type *ArgType = ArgValue->getType();
11920 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11921
11922 Value *Result = Builder.CreateCall(F, ArgValue);
11923 if (BuiltinID == AArch64::BI_CountOneBits64)
11924 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11925 return Result;
11926 }
11927
11928 if (BuiltinID == AArch64::BI__prefetch) {
11929 Value *Address = EmitScalarExpr(E->getArg(0));
11930 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11931 Value *Locality = ConstantInt::get(Int32Ty, 3);
11932 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11933 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11934 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11935 }
11936
11937 if (BuiltinID == AArch64::BI__hlt) {
11938 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
11939 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11940
11941 // Return 0 for convenience, even though MSVC returns some other undefined
11942 // value.
11943 return ConstantInt::get(Builder.getInt32Ty(), 0);
11944 }
11945
11946 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
11947 return Builder.CreateFPTrunc(
11948 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
11949 Builder.getFloatTy()),
11950 Builder.getBFloatTy());
11951
11952 // Handle MSVC intrinsics before argument evaluation to prevent double
11953 // evaluation.
11954 if (std::optional<MSVCIntrin> MsvcIntId =
11956 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11957
11958 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11959 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11960 return P.first == BuiltinID;
11961 });
11962 if (It != end(NEONEquivalentIntrinsicMap))
11963 BuiltinID = It->second;
11964
11965 // Find out if any arguments are required to be integer constant
11966 // expressions.
11967 unsigned ICEArguments = 0;
11969 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11970 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11971
11973 Address PtrOp0 = Address::invalid();
11974 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11975 if (i == 0) {
11976 switch (BuiltinID) {
11977 case NEON::BI__builtin_neon_vld1_v:
11978 case NEON::BI__builtin_neon_vld1q_v:
11979 case NEON::BI__builtin_neon_vld1_dup_v:
11980 case NEON::BI__builtin_neon_vld1q_dup_v:
11981 case NEON::BI__builtin_neon_vld1_lane_v:
11982 case NEON::BI__builtin_neon_vld1q_lane_v:
11983 case NEON::BI__builtin_neon_vst1_v:
11984 case NEON::BI__builtin_neon_vst1q_v:
11985 case NEON::BI__builtin_neon_vst1_lane_v:
11986 case NEON::BI__builtin_neon_vst1q_lane_v:
11987 case NEON::BI__builtin_neon_vldap1_lane_s64:
11988 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11989 case NEON::BI__builtin_neon_vstl1_lane_s64:
11990 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11991 // Get the alignment for the argument in addition to the value;
11992 // we'll use it later.
11993 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11994 Ops.push_back(PtrOp0.emitRawPointer(*this));
11995 continue;
11996 }
11997 }
11998 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11999 }
12000
12001 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
12002 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
12003 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
12004
12005 if (Builtin) {
12006 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
12007 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
12008 assert(Result && "SISD intrinsic should have been handled");
12009 return Result;
12010 }
12011
12012 const Expr *Arg = E->getArg(E->getNumArgs()-1);
12014 if (std::optional<llvm::APSInt> Result =
12016 // Determine the type of this overloaded NEON intrinsic.
12017 Type = NeonTypeFlags(Result->getZExtValue());
12018
12019 bool usgn = Type.isUnsigned();
12020 bool quad = Type.isQuad();
12021
12022 // Handle non-overloaded intrinsics first.
12023 switch (BuiltinID) {
12024 default: break;
12025 case NEON::BI__builtin_neon_vabsh_f16:
12026 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12027 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
12028 case NEON::BI__builtin_neon_vaddq_p128: {
12029 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
12030 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12031 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12032 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12033 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
12034 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12035 return Builder.CreateBitCast(Ops[0], Int128Ty);
12036 }
12037 case NEON::BI__builtin_neon_vldrq_p128: {
12038 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12039 Value *Ptr = EmitScalarExpr(E->getArg(0));
12040 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
12042 }
12043 case NEON::BI__builtin_neon_vstrq_p128: {
12044 Value *Ptr = Ops[0];
12045 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
12046 }
12047 case NEON::BI__builtin_neon_vcvts_f32_u32:
12048 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12049 usgn = true;
12050 [[fallthrough]];
12051 case NEON::BI__builtin_neon_vcvts_f32_s32:
12052 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12053 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12054 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12055 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
12056 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
12057 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12058 if (usgn)
12059 return Builder.CreateUIToFP(Ops[0], FTy);
12060 return Builder.CreateSIToFP(Ops[0], FTy);
12061 }
12062 case NEON::BI__builtin_neon_vcvth_f16_u16:
12063 case NEON::BI__builtin_neon_vcvth_f16_u32:
12064 case NEON::BI__builtin_neon_vcvth_f16_u64:
12065 usgn = true;
12066 [[fallthrough]];
12067 case NEON::BI__builtin_neon_vcvth_f16_s16:
12068 case NEON::BI__builtin_neon_vcvth_f16_s32:
12069 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12070 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12071 llvm::Type *FTy = HalfTy;
12072 llvm::Type *InTy;
12073 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12074 InTy = Int64Ty;
12075 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12076 InTy = Int32Ty;
12077 else
12078 InTy = Int16Ty;
12079 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12080 if (usgn)
12081 return Builder.CreateUIToFP(Ops[0], FTy);
12082 return Builder.CreateSIToFP(Ops[0], FTy);
12083 }
12084 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12085 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12086 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12087 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12088 case NEON::BI__builtin_neon_vcvth_u16_f16:
12089 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12090 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12091 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12092 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12093 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12094 unsigned Int;
12095 llvm::Type* InTy = Int32Ty;
12096 llvm::Type* FTy = HalfTy;
12097 llvm::Type *Tys[2] = {InTy, FTy};
12098 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12099 switch (BuiltinID) {
12100 default: llvm_unreachable("missing builtin ID in switch!");
12101 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12102 Int = Intrinsic::aarch64_neon_fcvtau; break;
12103 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12104 Int = Intrinsic::aarch64_neon_fcvtmu; break;
12105 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12106 Int = Intrinsic::aarch64_neon_fcvtnu; break;
12107 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12108 Int = Intrinsic::aarch64_neon_fcvtpu; break;
12109 case NEON::BI__builtin_neon_vcvth_u16_f16:
12110 Int = Intrinsic::aarch64_neon_fcvtzu; break;
12111 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12112 Int = Intrinsic::aarch64_neon_fcvtas; break;
12113 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12114 Int = Intrinsic::aarch64_neon_fcvtms; break;
12115 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12116 Int = Intrinsic::aarch64_neon_fcvtns; break;
12117 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12118 Int = Intrinsic::aarch64_neon_fcvtps; break;
12119 case NEON::BI__builtin_neon_vcvth_s16_f16:
12120 Int = Intrinsic::aarch64_neon_fcvtzs; break;
12121 }
12122 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
12123 return Builder.CreateTrunc(Ops[0], Int16Ty);
12124 }
12125 case NEON::BI__builtin_neon_vcaleh_f16:
12126 case NEON::BI__builtin_neon_vcalth_f16:
12127 case NEON::BI__builtin_neon_vcageh_f16:
12128 case NEON::BI__builtin_neon_vcagth_f16: {
12129 unsigned Int;
12130 llvm::Type* InTy = Int32Ty;
12131 llvm::Type* FTy = HalfTy;
12132 llvm::Type *Tys[2] = {InTy, FTy};
12133 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12134 switch (BuiltinID) {
12135 default: llvm_unreachable("missing builtin ID in switch!");
12136 case NEON::BI__builtin_neon_vcageh_f16:
12137 Int = Intrinsic::aarch64_neon_facge; break;
12138 case NEON::BI__builtin_neon_vcagth_f16:
12139 Int = Intrinsic::aarch64_neon_facgt; break;
12140 case NEON::BI__builtin_neon_vcaleh_f16:
12141 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
12142 case NEON::BI__builtin_neon_vcalth_f16:
12143 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
12144 }
12145 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
12146 return Builder.CreateTrunc(Ops[0], Int16Ty);
12147 }
12148 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12149 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12150 unsigned Int;
12151 llvm::Type* InTy = Int32Ty;
12152 llvm::Type* FTy = HalfTy;
12153 llvm::Type *Tys[2] = {InTy, FTy};
12154 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12155 switch (BuiltinID) {
12156 default: llvm_unreachable("missing builtin ID in switch!");
12157 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12158 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
12159 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12160 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
12161 }
12162 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12163 return Builder.CreateTrunc(Ops[0], Int16Ty);
12164 }
12165 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12166 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12167 unsigned Int;
12168 llvm::Type* FTy = HalfTy;
12169 llvm::Type* InTy = Int32Ty;
12170 llvm::Type *Tys[2] = {FTy, InTy};
12171 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12172 switch (BuiltinID) {
12173 default: llvm_unreachable("missing builtin ID in switch!");
12174 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12175 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12176 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
12177 break;
12178 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12179 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12180 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
12181 break;
12182 }
12183 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12184 }
12185 case NEON::BI__builtin_neon_vpaddd_s64: {
12186 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
12187 Value *Vec = EmitScalarExpr(E->getArg(0));
12188 // The vector is v2f64, so make sure it's bitcast to that.
12189 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
12190 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12191 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12192 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12193 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12194 // Pairwise addition of a v2f64 into a scalar f64.
12195 return Builder.CreateAdd(Op0, Op1, "vpaddd");
12196 }
12197 case NEON::BI__builtin_neon_vpaddd_f64: {
12198 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
12199 Value *Vec = EmitScalarExpr(E->getArg(0));
12200 // The vector is v2f64, so make sure it's bitcast to that.
12201 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
12202 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12203 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12204 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12205 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12206 // Pairwise addition of a v2f64 into a scalar f64.
12207 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12208 }
12209 case NEON::BI__builtin_neon_vpadds_f32: {
12210 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
12211 Value *Vec = EmitScalarExpr(E->getArg(0));
12212 // The vector is v2f32, so make sure it's bitcast to that.
12213 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
12214 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12215 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12216 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12217 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12218 // Pairwise addition of a v2f32 into a scalar f32.
12219 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12220 }
12221 case NEON::BI__builtin_neon_vceqzd_s64:
12222 case NEON::BI__builtin_neon_vceqzd_f64:
12223 case NEON::BI__builtin_neon_vceqzs_f32:
12224 case NEON::BI__builtin_neon_vceqzh_f16:
12225 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12227 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12228 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12229 case NEON::BI__builtin_neon_vcgezd_s64:
12230 case NEON::BI__builtin_neon_vcgezd_f64:
12231 case NEON::BI__builtin_neon_vcgezs_f32:
12232 case NEON::BI__builtin_neon_vcgezh_f16:
12233 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12235 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12236 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12237 case NEON::BI__builtin_neon_vclezd_s64:
12238 case NEON::BI__builtin_neon_vclezd_f64:
12239 case NEON::BI__builtin_neon_vclezs_f32:
12240 case NEON::BI__builtin_neon_vclezh_f16:
12241 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12243 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12244 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12245 case NEON::BI__builtin_neon_vcgtzd_s64:
12246 case NEON::BI__builtin_neon_vcgtzd_f64:
12247 case NEON::BI__builtin_neon_vcgtzs_f32:
12248 case NEON::BI__builtin_neon_vcgtzh_f16:
12249 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12251 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12252 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12253 case NEON::BI__builtin_neon_vcltzd_s64:
12254 case NEON::BI__builtin_neon_vcltzd_f64:
12255 case NEON::BI__builtin_neon_vcltzs_f32:
12256 case NEON::BI__builtin_neon_vcltzh_f16:
12257 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12259 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12260 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12261
12262 case NEON::BI__builtin_neon_vceqzd_u64: {
12263 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12264 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12265 Ops[0] =
12266 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
12267 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
12268 }
12269 case NEON::BI__builtin_neon_vceqd_f64:
12270 case NEON::BI__builtin_neon_vcled_f64:
12271 case NEON::BI__builtin_neon_vcltd_f64:
12272 case NEON::BI__builtin_neon_vcged_f64:
12273 case NEON::BI__builtin_neon_vcgtd_f64: {
12274 llvm::CmpInst::Predicate P;
12275 switch (BuiltinID) {
12276 default: llvm_unreachable("missing builtin ID in switch!");
12277 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
12278 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
12279 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
12280 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
12281 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
12282 }
12283 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12284 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12285 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12286 if (P == llvm::FCmpInst::FCMP_OEQ)
12287 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12288 else
12289 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12290 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
12291 }
12292 case NEON::BI__builtin_neon_vceqs_f32:
12293 case NEON::BI__builtin_neon_vcles_f32:
12294 case NEON::BI__builtin_neon_vclts_f32:
12295 case NEON::BI__builtin_neon_vcges_f32:
12296 case NEON::BI__builtin_neon_vcgts_f32: {
12297 llvm::CmpInst::Predicate P;
12298 switch (BuiltinID) {
12299 default: llvm_unreachable("missing builtin ID in switch!");
12300 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
12301 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
12302 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
12303 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
12304 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
12305 }
12306 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12307 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
12308 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
12309 if (P == llvm::FCmpInst::FCMP_OEQ)
12310 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12311 else
12312 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12313 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
12314 }
12315 case NEON::BI__builtin_neon_vceqh_f16:
12316 case NEON::BI__builtin_neon_vcleh_f16:
12317 case NEON::BI__builtin_neon_vclth_f16:
12318 case NEON::BI__builtin_neon_vcgeh_f16:
12319 case NEON::BI__builtin_neon_vcgth_f16: {
12320 llvm::CmpInst::Predicate P;
12321 switch (BuiltinID) {
12322 default: llvm_unreachable("missing builtin ID in switch!");
12323 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12324 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12325 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12326 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12327 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12328 }
12329 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12330 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12331 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12332 if (P == llvm::FCmpInst::FCMP_OEQ)
12333 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12334 else
12335 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12336 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12337 }
12338 case NEON::BI__builtin_neon_vceqd_s64:
12339 case NEON::BI__builtin_neon_vceqd_u64:
12340 case NEON::BI__builtin_neon_vcgtd_s64:
12341 case NEON::BI__builtin_neon_vcgtd_u64:
12342 case NEON::BI__builtin_neon_vcltd_s64:
12343 case NEON::BI__builtin_neon_vcltd_u64:
12344 case NEON::BI__builtin_neon_vcged_u64:
12345 case NEON::BI__builtin_neon_vcged_s64:
12346 case NEON::BI__builtin_neon_vcled_u64:
12347 case NEON::BI__builtin_neon_vcled_s64: {
12348 llvm::CmpInst::Predicate P;
12349 switch (BuiltinID) {
12350 default: llvm_unreachable("missing builtin ID in switch!");
12351 case NEON::BI__builtin_neon_vceqd_s64:
12352 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12353 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12354 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12355 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12356 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12357 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12358 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12359 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12360 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12361 }
12362 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12363 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12364 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12365 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12366 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12367 }
12368 case NEON::BI__builtin_neon_vtstd_s64:
12369 case NEON::BI__builtin_neon_vtstd_u64: {
12370 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12371 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12372 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12373 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12374 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12375 llvm::Constant::getNullValue(Int64Ty));
12376 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12377 }
12378 case NEON::BI__builtin_neon_vset_lane_i8:
12379 case NEON::BI__builtin_neon_vset_lane_i16:
12380 case NEON::BI__builtin_neon_vset_lane_i32:
12381 case NEON::BI__builtin_neon_vset_lane_i64:
12382 case NEON::BI__builtin_neon_vset_lane_bf16:
12383 case NEON::BI__builtin_neon_vset_lane_f32:
12384 case NEON::BI__builtin_neon_vsetq_lane_i8:
12385 case NEON::BI__builtin_neon_vsetq_lane_i16:
12386 case NEON::BI__builtin_neon_vsetq_lane_i32:
12387 case NEON::BI__builtin_neon_vsetq_lane_i64:
12388 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12389 case NEON::BI__builtin_neon_vsetq_lane_f32:
12390 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12391 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12392 case NEON::BI__builtin_neon_vset_lane_f64:
12393 // The vector type needs a cast for the v1f64 variant.
12394 Ops[1] =
12395 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12396 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12397 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12398 case NEON::BI__builtin_neon_vsetq_lane_f64:
12399 // The vector type needs a cast for the v2f64 variant.
12400 Ops[1] =
12401 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12402 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12403 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12404
12405 case NEON::BI__builtin_neon_vget_lane_i8:
12406 case NEON::BI__builtin_neon_vdupb_lane_i8:
12407 Ops[0] =
12408 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12409 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12410 "vget_lane");
12411 case NEON::BI__builtin_neon_vgetq_lane_i8:
12412 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12413 Ops[0] =
12414 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12415 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12416 "vgetq_lane");
12417 case NEON::BI__builtin_neon_vget_lane_i16:
12418 case NEON::BI__builtin_neon_vduph_lane_i16:
12419 Ops[0] =
12420 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12421 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12422 "vget_lane");
12423 case NEON::BI__builtin_neon_vgetq_lane_i16:
12424 case NEON::BI__builtin_neon_vduph_laneq_i16:
12425 Ops[0] =
12426 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12427 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12428 "vgetq_lane");
12429 case NEON::BI__builtin_neon_vget_lane_i32:
12430 case NEON::BI__builtin_neon_vdups_lane_i32:
12431 Ops[0] =
12432 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12433 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12434 "vget_lane");
12435 case NEON::BI__builtin_neon_vdups_lane_f32:
12436 Ops[0] =
12437 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12438 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12439 "vdups_lane");
12440 case NEON::BI__builtin_neon_vgetq_lane_i32:
12441 case NEON::BI__builtin_neon_vdups_laneq_i32:
12442 Ops[0] =
12443 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12444 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12445 "vgetq_lane");
12446 case NEON::BI__builtin_neon_vget_lane_i64:
12447 case NEON::BI__builtin_neon_vdupd_lane_i64:
12448 Ops[0] =
12449 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12450 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12451 "vget_lane");
12452 case NEON::BI__builtin_neon_vdupd_lane_f64:
12453 Ops[0] =
12454 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12455 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12456 "vdupd_lane");
12457 case NEON::BI__builtin_neon_vgetq_lane_i64:
12458 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12459 Ops[0] =
12460 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12461 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12462 "vgetq_lane");
12463 case NEON::BI__builtin_neon_vget_lane_f32:
12464 Ops[0] =
12465 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12466 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12467 "vget_lane");
12468 case NEON::BI__builtin_neon_vget_lane_f64:
12469 Ops[0] =
12470 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12471 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12472 "vget_lane");
12473 case NEON::BI__builtin_neon_vgetq_lane_f32:
12474 case NEON::BI__builtin_neon_vdups_laneq_f32:
12475 Ops[0] =
12476 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12477 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12478 "vgetq_lane");
12479 case NEON::BI__builtin_neon_vgetq_lane_f64:
12480 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12481 Ops[0] =
12482 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12483 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12484 "vgetq_lane");
12485 case NEON::BI__builtin_neon_vaddh_f16:
12486 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12487 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12488 case NEON::BI__builtin_neon_vsubh_f16:
12489 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12490 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12491 case NEON::BI__builtin_neon_vmulh_f16:
12492 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12493 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12494 case NEON::BI__builtin_neon_vdivh_f16:
12495 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12496 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12497 case NEON::BI__builtin_neon_vfmah_f16:
12498 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12500 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12501 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12502 case NEON::BI__builtin_neon_vfmsh_f16: {
12503 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12504
12505 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12507 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12508 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12509 }
12510 case NEON::BI__builtin_neon_vaddd_s64:
12511 case NEON::BI__builtin_neon_vaddd_u64:
12512 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12513 case NEON::BI__builtin_neon_vsubd_s64:
12514 case NEON::BI__builtin_neon_vsubd_u64:
12515 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12516 case NEON::BI__builtin_neon_vqdmlalh_s16:
12517 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12518 SmallVector<Value *, 2> ProductOps;
12519 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12520 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12521 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12522 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12523 ProductOps, "vqdmlXl");
12524 Constant *CI = ConstantInt::get(SizeTy, 0);
12525 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12526
12527 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12528 ? Intrinsic::aarch64_neon_sqadd
12529 : Intrinsic::aarch64_neon_sqsub;
12530 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12531 }
12532 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12533 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12534 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12535 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12536 Ops, "vqshlu_n");
12537 }
12538 case NEON::BI__builtin_neon_vqshld_n_u64:
12539 case NEON::BI__builtin_neon_vqshld_n_s64: {
12540 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12541 ? Intrinsic::aarch64_neon_uqshl
12542 : Intrinsic::aarch64_neon_sqshl;
12543 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12544 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12545 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12546 }
12547 case NEON::BI__builtin_neon_vrshrd_n_u64:
12548 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12549 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12550 ? Intrinsic::aarch64_neon_urshl
12551 : Intrinsic::aarch64_neon_srshl;
12552 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12553 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12554 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12555 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12556 }
12557 case NEON::BI__builtin_neon_vrsrad_n_u64:
12558 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12559 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12560 ? Intrinsic::aarch64_neon_urshl
12561 : Intrinsic::aarch64_neon_srshl;
12562 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12563 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12564 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12565 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12566 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12567 }
12568 case NEON::BI__builtin_neon_vshld_n_s64:
12569 case NEON::BI__builtin_neon_vshld_n_u64: {
12570 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12571 return Builder.CreateShl(
12572 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12573 }
12574 case NEON::BI__builtin_neon_vshrd_n_s64: {
12575 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12576 return Builder.CreateAShr(
12577 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12578 Amt->getZExtValue())),
12579 "shrd_n");
12580 }
12581 case NEON::BI__builtin_neon_vshrd_n_u64: {
12582 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12583 uint64_t ShiftAmt = Amt->getZExtValue();
12584 // Right-shifting an unsigned value by its size yields 0.
12585 if (ShiftAmt == 64)
12586 return ConstantInt::get(Int64Ty, 0);
12587 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12588 "shrd_n");
12589 }
12590 case NEON::BI__builtin_neon_vsrad_n_s64: {
12591 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12592 Ops[1] = Builder.CreateAShr(
12593 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12594 Amt->getZExtValue())),
12595 "shrd_n");
12596 return Builder.CreateAdd(Ops[0], Ops[1]);
12597 }
12598 case NEON::BI__builtin_neon_vsrad_n_u64: {
12599 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12600 uint64_t ShiftAmt = Amt->getZExtValue();
12601 // Right-shifting an unsigned value by its size yields 0.
12602 // As Op + 0 = Op, return Ops[0] directly.
12603 if (ShiftAmt == 64)
12604 return Ops[0];
12605 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12606 "shrd_n");
12607 return Builder.CreateAdd(Ops[0], Ops[1]);
12608 }
12609 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12610 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12611 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12612 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12613 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12614 "lane");
12615 SmallVector<Value *, 2> ProductOps;
12616 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12617 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12618 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12619 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12620 ProductOps, "vqdmlXl");
12621 Constant *CI = ConstantInt::get(SizeTy, 0);
12622 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12623 Ops.pop_back();
12624
12625 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12626 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12627 ? Intrinsic::aarch64_neon_sqadd
12628 : Intrinsic::aarch64_neon_sqsub;
12629 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12630 }
12631 case NEON::BI__builtin_neon_vqdmlals_s32:
12632 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12633 SmallVector<Value *, 2> ProductOps;
12634 ProductOps.push_back(Ops[1]);
12635 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12636 Ops[1] =
12637 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12638 ProductOps, "vqdmlXl");
12639
12640 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12641 ? Intrinsic::aarch64_neon_sqadd
12642 : Intrinsic::aarch64_neon_sqsub;
12643 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12644 }
12645 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12646 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12647 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12648 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12649 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12650 "lane");
12651 SmallVector<Value *, 2> ProductOps;
12652 ProductOps.push_back(Ops[1]);
12653 ProductOps.push_back(Ops[2]);
12654 Ops[1] =
12655 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12656 ProductOps, "vqdmlXl");
12657 Ops.pop_back();
12658
12659 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12660 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12661 ? Intrinsic::aarch64_neon_sqadd
12662 : Intrinsic::aarch64_neon_sqsub;
12663 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12664 }
12665 case NEON::BI__builtin_neon_vget_lane_bf16:
12666 case NEON::BI__builtin_neon_vduph_lane_bf16:
12667 case NEON::BI__builtin_neon_vduph_lane_f16: {
12668 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12669 "vget_lane");
12670 }
12671 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12672 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12673 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12674 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12675 "vgetq_lane");
12676 }
12677 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12678 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12679 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12680 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12681 }
12682 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12683 SmallVector<int, 16> ConcatMask(8);
12684 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12685 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12686 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12687 llvm::Value *Trunc =
12688 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12689 return Builder.CreateShuffleVector(
12690 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12691 }
12692 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12693 SmallVector<int, 16> ConcatMask(8);
12694 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12695 SmallVector<int, 16> LoMask(4);
12696 std::iota(LoMask.begin(), LoMask.end(), 0);
12697 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12698 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12699 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
12700 llvm::Value *Inactive = Builder.CreateShuffleVector(
12701 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12702 llvm::Value *Trunc =
12703 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12704 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12705 }
12706
12707 case clang::AArch64::BI_InterlockedAdd:
12708 case clang::AArch64::BI_InterlockedAdd64: {
12709 Address DestAddr = CheckAtomicAlignment(*this, E);
12710 Value *Val = EmitScalarExpr(E->getArg(1));
12711 AtomicRMWInst *RMWI =
12712 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12713 llvm::AtomicOrdering::SequentiallyConsistent);
12714 return Builder.CreateAdd(RMWI, Val);
12715 }
12716 }
12717
12718 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12719 llvm::Type *Ty = VTy;
12720 if (!Ty)
12721 return nullptr;
12722
12723 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12724 // defer to common code if it's been added to our special map.
12727
12728 if (Builtin)
12730 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12731 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12732 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12733
12734 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12735 return V;
12736
12737 unsigned Int;
12738 switch (BuiltinID) {
12739 default: return nullptr;
12740 case NEON::BI__builtin_neon_vbsl_v:
12741 case NEON::BI__builtin_neon_vbslq_v: {
12742 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12743 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12744 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12745 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12746
12747 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12748 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12749 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12750 return Builder.CreateBitCast(Ops[0], Ty);
12751 }
12752 case NEON::BI__builtin_neon_vfma_lane_v:
12753 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12754 // The ARM builtins (and instructions) have the addend as the first
12755 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12756 Value *Addend = Ops[0];
12757 Value *Multiplicand = Ops[1];
12758 Value *LaneSource = Ops[2];
12759 Ops[0] = Multiplicand;
12760 Ops[1] = LaneSource;
12761 Ops[2] = Addend;
12762
12763 // Now adjust things to handle the lane access.
12764 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12765 ? llvm::FixedVectorType::get(VTy->getElementType(),
12766 VTy->getNumElements() / 2)
12767 : VTy;
12768 llvm::Constant *cst = cast<Constant>(Ops[3]);
12769 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12770 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12771 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12772
12773 Ops.pop_back();
12774 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12775 : Intrinsic::fma;
12776 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12777 }
12778 case NEON::BI__builtin_neon_vfma_laneq_v: {
12779 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12780 // v1f64 fma should be mapped to Neon scalar f64 fma
12781 if (VTy && VTy->getElementType() == DoubleTy) {
12782 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12783 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12784 llvm::FixedVectorType *VTy =
12786 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12787 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12788 Value *Result;
12790 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12791 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12792 return Builder.CreateBitCast(Result, Ty);
12793 }
12794 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12795 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12796
12797 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12798 VTy->getNumElements() * 2);
12799 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12800 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12801 cast<ConstantInt>(Ops[3]));
12802 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12803
12805 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12806 {Ops[2], Ops[1], Ops[0]});
12807 }
12808 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12809 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12810 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12811
12812 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12813 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12815 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12816 {Ops[2], Ops[1], Ops[0]});
12817 }
12818 case NEON::BI__builtin_neon_vfmah_lane_f16:
12819 case NEON::BI__builtin_neon_vfmas_lane_f32:
12820 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12821 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12822 case NEON::BI__builtin_neon_vfmad_lane_f64:
12823 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12824 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12825 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12826 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12828 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12829 {Ops[1], Ops[2], Ops[0]});
12830 }
12831 case NEON::BI__builtin_neon_vmull_v:
12832 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12833 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12834 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12835 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12836 case NEON::BI__builtin_neon_vmax_v:
12837 case NEON::BI__builtin_neon_vmaxq_v:
12838 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12839 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12840 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12841 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12842 case NEON::BI__builtin_neon_vmaxh_f16: {
12843 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12844 Int = Intrinsic::aarch64_neon_fmax;
12845 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12846 }
12847 case NEON::BI__builtin_neon_vmin_v:
12848 case NEON::BI__builtin_neon_vminq_v:
12849 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12850 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12851 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12852 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12853 case NEON::BI__builtin_neon_vminh_f16: {
12854 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12855 Int = Intrinsic::aarch64_neon_fmin;
12856 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12857 }
12858 case NEON::BI__builtin_neon_vabd_v:
12859 case NEON::BI__builtin_neon_vabdq_v:
12860 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12861 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12862 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12863 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12864 case NEON::BI__builtin_neon_vpadal_v:
12865 case NEON::BI__builtin_neon_vpadalq_v: {
12866 unsigned ArgElts = VTy->getNumElements();
12867 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12868 unsigned BitWidth = EltTy->getBitWidth();
12869 auto *ArgTy = llvm::FixedVectorType::get(
12870 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12871 llvm::Type* Tys[2] = { VTy, ArgTy };
12872 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12874 TmpOps.push_back(Ops[1]);
12875 Function *F = CGM.getIntrinsic(Int, Tys);
12876 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12877 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12878 return Builder.CreateAdd(tmp, addend);
12879 }
12880 case NEON::BI__builtin_neon_vpmin_v:
12881 case NEON::BI__builtin_neon_vpminq_v:
12882 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12883 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12884 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12885 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12886 case NEON::BI__builtin_neon_vpmax_v:
12887 case NEON::BI__builtin_neon_vpmaxq_v:
12888 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12889 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12890 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12891 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12892 case NEON::BI__builtin_neon_vminnm_v:
12893 case NEON::BI__builtin_neon_vminnmq_v:
12894 Int = Intrinsic::aarch64_neon_fminnm;
12895 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12896 case NEON::BI__builtin_neon_vminnmh_f16:
12897 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12898 Int = Intrinsic::aarch64_neon_fminnm;
12899 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12900 case NEON::BI__builtin_neon_vmaxnm_v:
12901 case NEON::BI__builtin_neon_vmaxnmq_v:
12902 Int = Intrinsic::aarch64_neon_fmaxnm;
12903 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12904 case NEON::BI__builtin_neon_vmaxnmh_f16:
12905 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12906 Int = Intrinsic::aarch64_neon_fmaxnm;
12907 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12908 case NEON::BI__builtin_neon_vrecpss_f32: {
12909 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12910 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12911 Ops, "vrecps");
12912 }
12913 case NEON::BI__builtin_neon_vrecpsd_f64:
12914 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12915 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12916 Ops, "vrecps");
12917 case NEON::BI__builtin_neon_vrecpsh_f16:
12918 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12919 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12920 Ops, "vrecps");
12921 case NEON::BI__builtin_neon_vqshrun_n_v:
12922 Int = Intrinsic::aarch64_neon_sqshrun;
12923 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12924 case NEON::BI__builtin_neon_vqrshrun_n_v:
12925 Int = Intrinsic::aarch64_neon_sqrshrun;
12926 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12927 case NEON::BI__builtin_neon_vqshrn_n_v:
12928 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12929 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12930 case NEON::BI__builtin_neon_vrshrn_n_v:
12931 Int = Intrinsic::aarch64_neon_rshrn;
12932 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12933 case NEON::BI__builtin_neon_vqrshrn_n_v:
12934 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12935 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12936 case NEON::BI__builtin_neon_vrndah_f16: {
12937 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12938 Int = Builder.getIsFPConstrained()
12939 ? Intrinsic::experimental_constrained_round
12940 : Intrinsic::round;
12941 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12942 }
12943 case NEON::BI__builtin_neon_vrnda_v:
12944 case NEON::BI__builtin_neon_vrndaq_v: {
12945 Int = Builder.getIsFPConstrained()
12946 ? Intrinsic::experimental_constrained_round
12947 : Intrinsic::round;
12948 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12949 }
12950 case NEON::BI__builtin_neon_vrndih_f16: {
12951 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12952 Int = Builder.getIsFPConstrained()
12953 ? Intrinsic::experimental_constrained_nearbyint
12954 : Intrinsic::nearbyint;
12955 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12956 }
12957 case NEON::BI__builtin_neon_vrndmh_f16: {
12958 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12959 Int = Builder.getIsFPConstrained()
12960 ? Intrinsic::experimental_constrained_floor
12961 : Intrinsic::floor;
12962 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12963 }
12964 case NEON::BI__builtin_neon_vrndm_v:
12965 case NEON::BI__builtin_neon_vrndmq_v: {
12966 Int = Builder.getIsFPConstrained()
12967 ? Intrinsic::experimental_constrained_floor
12968 : Intrinsic::floor;
12969 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12970 }
12971 case NEON::BI__builtin_neon_vrndnh_f16: {
12972 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12973 Int = Builder.getIsFPConstrained()
12974 ? Intrinsic::experimental_constrained_roundeven
12975 : Intrinsic::roundeven;
12976 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12977 }
12978 case NEON::BI__builtin_neon_vrndn_v:
12979 case NEON::BI__builtin_neon_vrndnq_v: {
12980 Int = Builder.getIsFPConstrained()
12981 ? Intrinsic::experimental_constrained_roundeven
12982 : Intrinsic::roundeven;
12983 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12984 }
12985 case NEON::BI__builtin_neon_vrndns_f32: {
12986 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12987 Int = Builder.getIsFPConstrained()
12988 ? Intrinsic::experimental_constrained_roundeven
12989 : Intrinsic::roundeven;
12990 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12991 }
12992 case NEON::BI__builtin_neon_vrndph_f16: {
12993 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12994 Int = Builder.getIsFPConstrained()
12995 ? Intrinsic::experimental_constrained_ceil
12996 : Intrinsic::ceil;
12997 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12998 }
12999 case NEON::BI__builtin_neon_vrndp_v:
13000 case NEON::BI__builtin_neon_vrndpq_v: {
13001 Int = Builder.getIsFPConstrained()
13002 ? Intrinsic::experimental_constrained_ceil
13003 : Intrinsic::ceil;
13004 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
13005 }
13006 case NEON::BI__builtin_neon_vrndxh_f16: {
13007 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13008 Int = Builder.getIsFPConstrained()
13009 ? Intrinsic::experimental_constrained_rint
13010 : Intrinsic::rint;
13011 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
13012 }
13013 case NEON::BI__builtin_neon_vrndx_v:
13014 case NEON::BI__builtin_neon_vrndxq_v: {
13015 Int = Builder.getIsFPConstrained()
13016 ? Intrinsic::experimental_constrained_rint
13017 : Intrinsic::rint;
13018 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
13019 }
13020 case NEON::BI__builtin_neon_vrndh_f16: {
13021 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13022 Int = Builder.getIsFPConstrained()
13023 ? Intrinsic::experimental_constrained_trunc
13024 : Intrinsic::trunc;
13025 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
13026 }
13027 case NEON::BI__builtin_neon_vrnd32x_f32:
13028 case NEON::BI__builtin_neon_vrnd32xq_f32:
13029 case NEON::BI__builtin_neon_vrnd32x_f64:
13030 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13031 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13032 Int = Intrinsic::aarch64_neon_frint32x;
13033 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
13034 }
13035 case NEON::BI__builtin_neon_vrnd32z_f32:
13036 case NEON::BI__builtin_neon_vrnd32zq_f32:
13037 case NEON::BI__builtin_neon_vrnd32z_f64:
13038 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13039 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13040 Int = Intrinsic::aarch64_neon_frint32z;
13041 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
13042 }
13043 case NEON::BI__builtin_neon_vrnd64x_f32:
13044 case NEON::BI__builtin_neon_vrnd64xq_f32:
13045 case NEON::BI__builtin_neon_vrnd64x_f64:
13046 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13047 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13048 Int = Intrinsic::aarch64_neon_frint64x;
13049 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
13050 }
13051 case NEON::BI__builtin_neon_vrnd64z_f32:
13052 case NEON::BI__builtin_neon_vrnd64zq_f32:
13053 case NEON::BI__builtin_neon_vrnd64z_f64:
13054 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13055 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13056 Int = Intrinsic::aarch64_neon_frint64z;
13057 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
13058 }
13059 case NEON::BI__builtin_neon_vrnd_v:
13060 case NEON::BI__builtin_neon_vrndq_v: {
13061 Int = Builder.getIsFPConstrained()
13062 ? Intrinsic::experimental_constrained_trunc
13063 : Intrinsic::trunc;
13064 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
13065 }
13066 case NEON::BI__builtin_neon_vcvt_f64_v:
13067 case NEON::BI__builtin_neon_vcvtq_f64_v:
13068 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13069 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
13070 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
13071 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
13072 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13073 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
13074 "unexpected vcvt_f64_f32 builtin");
13075 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
13076 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13077
13078 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
13079 }
13080 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13081 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
13082 "unexpected vcvt_f32_f64 builtin");
13083 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
13084 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13085
13086 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
13087 }
13088 case NEON::BI__builtin_neon_vcvt_s32_v:
13089 case NEON::BI__builtin_neon_vcvt_u32_v:
13090 case NEON::BI__builtin_neon_vcvt_s64_v:
13091 case NEON::BI__builtin_neon_vcvt_u64_v:
13092 case NEON::BI__builtin_neon_vcvt_s16_f16:
13093 case NEON::BI__builtin_neon_vcvt_u16_f16:
13094 case NEON::BI__builtin_neon_vcvtq_s32_v:
13095 case NEON::BI__builtin_neon_vcvtq_u32_v:
13096 case NEON::BI__builtin_neon_vcvtq_s64_v:
13097 case NEON::BI__builtin_neon_vcvtq_u64_v:
13098 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13099 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13100 Int =
13101 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13102 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
13103 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
13104 }
13105 case NEON::BI__builtin_neon_vcvta_s16_f16:
13106 case NEON::BI__builtin_neon_vcvta_u16_f16:
13107 case NEON::BI__builtin_neon_vcvta_s32_v:
13108 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13109 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13110 case NEON::BI__builtin_neon_vcvta_u32_v:
13111 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13112 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13113 case NEON::BI__builtin_neon_vcvta_s64_v:
13114 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13115 case NEON::BI__builtin_neon_vcvta_u64_v:
13116 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13117 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13118 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13119 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
13120 }
13121 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13122 case NEON::BI__builtin_neon_vcvtm_s32_v:
13123 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13124 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13125 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13126 case NEON::BI__builtin_neon_vcvtm_u32_v:
13127 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13128 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13129 case NEON::BI__builtin_neon_vcvtm_s64_v:
13130 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13131 case NEON::BI__builtin_neon_vcvtm_u64_v:
13132 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13133 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13134 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13135 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
13136 }
13137 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13138 case NEON::BI__builtin_neon_vcvtn_s32_v:
13139 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13140 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13141 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13142 case NEON::BI__builtin_neon_vcvtn_u32_v:
13143 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13144 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13145 case NEON::BI__builtin_neon_vcvtn_s64_v:
13146 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13147 case NEON::BI__builtin_neon_vcvtn_u64_v:
13148 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13149 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13150 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13151 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
13152 }
13153 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13154 case NEON::BI__builtin_neon_vcvtp_s32_v:
13155 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13156 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13157 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13158 case NEON::BI__builtin_neon_vcvtp_u32_v:
13159 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13160 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13161 case NEON::BI__builtin_neon_vcvtp_s64_v:
13162 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13163 case NEON::BI__builtin_neon_vcvtp_u64_v:
13164 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13165 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13166 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13167 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
13168 }
13169 case NEON::BI__builtin_neon_vmulx_v:
13170 case NEON::BI__builtin_neon_vmulxq_v: {
13171 Int = Intrinsic::aarch64_neon_fmulx;
13172 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
13173 }
13174 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13175 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13176 // vmulx_lane should be mapped to Neon scalar mulx after
13177 // extracting the scalar element
13178 Ops.push_back(EmitScalarExpr(E->getArg(2)));
13179 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13180 Ops.pop_back();
13181 Int = Intrinsic::aarch64_neon_fmulx;
13182 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
13183 }
13184 case NEON::BI__builtin_neon_vmul_lane_v:
13185 case NEON::BI__builtin_neon_vmul_laneq_v: {
13186 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13187 bool Quad = false;
13188 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13189 Quad = true;
13190 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13191 llvm::FixedVectorType *VTy =
13193 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13194 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13195 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
13196 return Builder.CreateBitCast(Result, Ty);
13197 }
13198 case NEON::BI__builtin_neon_vnegd_s64:
13199 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
13200 case NEON::BI__builtin_neon_vnegh_f16:
13201 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
13202 case NEON::BI__builtin_neon_vpmaxnm_v:
13203 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13204 Int = Intrinsic::aarch64_neon_fmaxnmp;
13205 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
13206 }
13207 case NEON::BI__builtin_neon_vpminnm_v:
13208 case NEON::BI__builtin_neon_vpminnmq_v: {
13209 Int = Intrinsic::aarch64_neon_fminnmp;
13210 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
13211 }
13212 case NEON::BI__builtin_neon_vsqrth_f16: {
13213 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13214 Int = Builder.getIsFPConstrained()
13215 ? Intrinsic::experimental_constrained_sqrt
13216 : Intrinsic::sqrt;
13217 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
13218 }
13219 case NEON::BI__builtin_neon_vsqrt_v:
13220 case NEON::BI__builtin_neon_vsqrtq_v: {
13221 Int = Builder.getIsFPConstrained()
13222 ? Intrinsic::experimental_constrained_sqrt
13223 : Intrinsic::sqrt;
13224 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13225 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
13226 }
13227 case NEON::BI__builtin_neon_vrbit_v:
13228 case NEON::BI__builtin_neon_vrbitq_v: {
13229 Int = Intrinsic::bitreverse;
13230 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
13231 }
13232 case NEON::BI__builtin_neon_vaddv_u8:
13233 // FIXME: These are handled by the AArch64 scalar code.
13234 usgn = true;
13235 [[fallthrough]];
13236 case NEON::BI__builtin_neon_vaddv_s8: {
13237 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13238 Ty = Int32Ty;
13239 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13240 llvm::Type *Tys[2] = { Ty, VTy };
13241 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13242 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13243 return Builder.CreateTrunc(Ops[0], Int8Ty);
13244 }
13245 case NEON::BI__builtin_neon_vaddv_u16:
13246 usgn = true;
13247 [[fallthrough]];
13248 case NEON::BI__builtin_neon_vaddv_s16: {
13249 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13250 Ty = Int32Ty;
13251 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13252 llvm::Type *Tys[2] = { Ty, VTy };
13253 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13254 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13255 return Builder.CreateTrunc(Ops[0], Int16Ty);
13256 }
13257 case NEON::BI__builtin_neon_vaddvq_u8:
13258 usgn = true;
13259 [[fallthrough]];
13260 case NEON::BI__builtin_neon_vaddvq_s8: {
13261 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13262 Ty = Int32Ty;
13263 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13264 llvm::Type *Tys[2] = { Ty, VTy };
13265 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13266 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13267 return Builder.CreateTrunc(Ops[0], Int8Ty);
13268 }
13269 case NEON::BI__builtin_neon_vaddvq_u16:
13270 usgn = true;
13271 [[fallthrough]];
13272 case NEON::BI__builtin_neon_vaddvq_s16: {
13273 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13274 Ty = Int32Ty;
13275 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13276 llvm::Type *Tys[2] = { Ty, VTy };
13277 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13278 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13279 return Builder.CreateTrunc(Ops[0], Int16Ty);
13280 }
13281 case NEON::BI__builtin_neon_vmaxv_u8: {
13282 Int = Intrinsic::aarch64_neon_umaxv;
13283 Ty = Int32Ty;
13284 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13285 llvm::Type *Tys[2] = { Ty, VTy };
13286 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13287 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13288 return Builder.CreateTrunc(Ops[0], Int8Ty);
13289 }
13290 case NEON::BI__builtin_neon_vmaxv_u16: {
13291 Int = Intrinsic::aarch64_neon_umaxv;
13292 Ty = Int32Ty;
13293 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13294 llvm::Type *Tys[2] = { Ty, VTy };
13295 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13296 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13297 return Builder.CreateTrunc(Ops[0], Int16Ty);
13298 }
13299 case NEON::BI__builtin_neon_vmaxvq_u8: {
13300 Int = Intrinsic::aarch64_neon_umaxv;
13301 Ty = Int32Ty;
13302 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13303 llvm::Type *Tys[2] = { Ty, VTy };
13304 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13305 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13306 return Builder.CreateTrunc(Ops[0], Int8Ty);
13307 }
13308 case NEON::BI__builtin_neon_vmaxvq_u16: {
13309 Int = Intrinsic::aarch64_neon_umaxv;
13310 Ty = Int32Ty;
13311 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13312 llvm::Type *Tys[2] = { Ty, VTy };
13313 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13314 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13315 return Builder.CreateTrunc(Ops[0], Int16Ty);
13316 }
13317 case NEON::BI__builtin_neon_vmaxv_s8: {
13318 Int = Intrinsic::aarch64_neon_smaxv;
13319 Ty = Int32Ty;
13320 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13321 llvm::Type *Tys[2] = { Ty, VTy };
13322 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13323 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13324 return Builder.CreateTrunc(Ops[0], Int8Ty);
13325 }
13326 case NEON::BI__builtin_neon_vmaxv_s16: {
13327 Int = Intrinsic::aarch64_neon_smaxv;
13328 Ty = Int32Ty;
13329 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13330 llvm::Type *Tys[2] = { Ty, VTy };
13331 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13332 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13333 return Builder.CreateTrunc(Ops[0], Int16Ty);
13334 }
13335 case NEON::BI__builtin_neon_vmaxvq_s8: {
13336 Int = Intrinsic::aarch64_neon_smaxv;
13337 Ty = Int32Ty;
13338 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13339 llvm::Type *Tys[2] = { Ty, VTy };
13340 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13341 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13342 return Builder.CreateTrunc(Ops[0], Int8Ty);
13343 }
13344 case NEON::BI__builtin_neon_vmaxvq_s16: {
13345 Int = Intrinsic::aarch64_neon_smaxv;
13346 Ty = Int32Ty;
13347 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13348 llvm::Type *Tys[2] = { Ty, VTy };
13349 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13350 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13351 return Builder.CreateTrunc(Ops[0], Int16Ty);
13352 }
13353 case NEON::BI__builtin_neon_vmaxv_f16: {
13354 Int = Intrinsic::aarch64_neon_fmaxv;
13355 Ty = HalfTy;
13356 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13357 llvm::Type *Tys[2] = { Ty, VTy };
13358 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13359 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13360 return Builder.CreateTrunc(Ops[0], HalfTy);
13361 }
13362 case NEON::BI__builtin_neon_vmaxvq_f16: {
13363 Int = Intrinsic::aarch64_neon_fmaxv;
13364 Ty = HalfTy;
13365 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13366 llvm::Type *Tys[2] = { Ty, VTy };
13367 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13368 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13369 return Builder.CreateTrunc(Ops[0], HalfTy);
13370 }
13371 case NEON::BI__builtin_neon_vminv_u8: {
13372 Int = Intrinsic::aarch64_neon_uminv;
13373 Ty = Int32Ty;
13374 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13375 llvm::Type *Tys[2] = { Ty, VTy };
13376 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13377 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13378 return Builder.CreateTrunc(Ops[0], Int8Ty);
13379 }
13380 case NEON::BI__builtin_neon_vminv_u16: {
13381 Int = Intrinsic::aarch64_neon_uminv;
13382 Ty = Int32Ty;
13383 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13384 llvm::Type *Tys[2] = { Ty, VTy };
13385 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13386 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13387 return Builder.CreateTrunc(Ops[0], Int16Ty);
13388 }
13389 case NEON::BI__builtin_neon_vminvq_u8: {
13390 Int = Intrinsic::aarch64_neon_uminv;
13391 Ty = Int32Ty;
13392 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13393 llvm::Type *Tys[2] = { Ty, VTy };
13394 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13395 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13396 return Builder.CreateTrunc(Ops[0], Int8Ty);
13397 }
13398 case NEON::BI__builtin_neon_vminvq_u16: {
13399 Int = Intrinsic::aarch64_neon_uminv;
13400 Ty = Int32Ty;
13401 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13402 llvm::Type *Tys[2] = { Ty, VTy };
13403 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13404 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13405 return Builder.CreateTrunc(Ops[0], Int16Ty);
13406 }
13407 case NEON::BI__builtin_neon_vminv_s8: {
13408 Int = Intrinsic::aarch64_neon_sminv;
13409 Ty = Int32Ty;
13410 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13411 llvm::Type *Tys[2] = { Ty, VTy };
13412 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13413 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13414 return Builder.CreateTrunc(Ops[0], Int8Ty);
13415 }
13416 case NEON::BI__builtin_neon_vminv_s16: {
13417 Int = Intrinsic::aarch64_neon_sminv;
13418 Ty = Int32Ty;
13419 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13420 llvm::Type *Tys[2] = { Ty, VTy };
13421 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13422 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13423 return Builder.CreateTrunc(Ops[0], Int16Ty);
13424 }
13425 case NEON::BI__builtin_neon_vminvq_s8: {
13426 Int = Intrinsic::aarch64_neon_sminv;
13427 Ty = Int32Ty;
13428 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13429 llvm::Type *Tys[2] = { Ty, VTy };
13430 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13431 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13432 return Builder.CreateTrunc(Ops[0], Int8Ty);
13433 }
13434 case NEON::BI__builtin_neon_vminvq_s16: {
13435 Int = Intrinsic::aarch64_neon_sminv;
13436 Ty = Int32Ty;
13437 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13438 llvm::Type *Tys[2] = { Ty, VTy };
13439 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13440 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13441 return Builder.CreateTrunc(Ops[0], Int16Ty);
13442 }
13443 case NEON::BI__builtin_neon_vminv_f16: {
13444 Int = Intrinsic::aarch64_neon_fminv;
13445 Ty = HalfTy;
13446 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13447 llvm::Type *Tys[2] = { Ty, VTy };
13448 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13449 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13450 return Builder.CreateTrunc(Ops[0], HalfTy);
13451 }
13452 case NEON::BI__builtin_neon_vminvq_f16: {
13453 Int = Intrinsic::aarch64_neon_fminv;
13454 Ty = HalfTy;
13455 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13456 llvm::Type *Tys[2] = { Ty, VTy };
13457 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13458 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13459 return Builder.CreateTrunc(Ops[0], HalfTy);
13460 }
13461 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13462 Int = Intrinsic::aarch64_neon_fmaxnmv;
13463 Ty = HalfTy;
13464 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13465 llvm::Type *Tys[2] = { Ty, VTy };
13466 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13467 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13468 return Builder.CreateTrunc(Ops[0], HalfTy);
13469 }
13470 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13471 Int = Intrinsic::aarch64_neon_fmaxnmv;
13472 Ty = HalfTy;
13473 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13474 llvm::Type *Tys[2] = { Ty, VTy };
13475 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13476 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13477 return Builder.CreateTrunc(Ops[0], HalfTy);
13478 }
13479 case NEON::BI__builtin_neon_vminnmv_f16: {
13480 Int = Intrinsic::aarch64_neon_fminnmv;
13481 Ty = HalfTy;
13482 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13483 llvm::Type *Tys[2] = { Ty, VTy };
13484 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13485 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13486 return Builder.CreateTrunc(Ops[0], HalfTy);
13487 }
13488 case NEON::BI__builtin_neon_vminnmvq_f16: {
13489 Int = Intrinsic::aarch64_neon_fminnmv;
13490 Ty = HalfTy;
13491 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13492 llvm::Type *Tys[2] = { Ty, VTy };
13493 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13494 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13495 return Builder.CreateTrunc(Ops[0], HalfTy);
13496 }
13497 case NEON::BI__builtin_neon_vmul_n_f64: {
13498 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13499 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13500 return Builder.CreateFMul(Ops[0], RHS);
13501 }
13502 case NEON::BI__builtin_neon_vaddlv_u8: {
13503 Int = Intrinsic::aarch64_neon_uaddlv;
13504 Ty = Int32Ty;
13505 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13506 llvm::Type *Tys[2] = { Ty, VTy };
13507 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13508 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13509 return Builder.CreateTrunc(Ops[0], Int16Ty);
13510 }
13511 case NEON::BI__builtin_neon_vaddlv_u16: {
13512 Int = Intrinsic::aarch64_neon_uaddlv;
13513 Ty = Int32Ty;
13514 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13515 llvm::Type *Tys[2] = { Ty, VTy };
13516 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13517 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13518 }
13519 case NEON::BI__builtin_neon_vaddlvq_u8: {
13520 Int = Intrinsic::aarch64_neon_uaddlv;
13521 Ty = Int32Ty;
13522 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13523 llvm::Type *Tys[2] = { Ty, VTy };
13524 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13525 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13526 return Builder.CreateTrunc(Ops[0], Int16Ty);
13527 }
13528 case NEON::BI__builtin_neon_vaddlvq_u16: {
13529 Int = Intrinsic::aarch64_neon_uaddlv;
13530 Ty = Int32Ty;
13531 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13532 llvm::Type *Tys[2] = { Ty, VTy };
13533 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13534 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13535 }
13536 case NEON::BI__builtin_neon_vaddlv_s8: {
13537 Int = Intrinsic::aarch64_neon_saddlv;
13538 Ty = Int32Ty;
13539 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13540 llvm::Type *Tys[2] = { Ty, VTy };
13541 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13542 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13543 return Builder.CreateTrunc(Ops[0], Int16Ty);
13544 }
13545 case NEON::BI__builtin_neon_vaddlv_s16: {
13546 Int = Intrinsic::aarch64_neon_saddlv;
13547 Ty = Int32Ty;
13548 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13549 llvm::Type *Tys[2] = { Ty, VTy };
13550 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13551 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13552 }
13553 case NEON::BI__builtin_neon_vaddlvq_s8: {
13554 Int = Intrinsic::aarch64_neon_saddlv;
13555 Ty = Int32Ty;
13556 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13557 llvm::Type *Tys[2] = { Ty, VTy };
13558 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13559 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13560 return Builder.CreateTrunc(Ops[0], Int16Ty);
13561 }
13562 case NEON::BI__builtin_neon_vaddlvq_s16: {
13563 Int = Intrinsic::aarch64_neon_saddlv;
13564 Ty = Int32Ty;
13565 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13566 llvm::Type *Tys[2] = { Ty, VTy };
13567 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13568 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13569 }
13570 case NEON::BI__builtin_neon_vsri_n_v:
13571 case NEON::BI__builtin_neon_vsriq_n_v: {
13572 Int = Intrinsic::aarch64_neon_vsri;
13573 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13574 return EmitNeonCall(Intrin, Ops, "vsri_n");
13575 }
13576 case NEON::BI__builtin_neon_vsli_n_v:
13577 case NEON::BI__builtin_neon_vsliq_n_v: {
13578 Int = Intrinsic::aarch64_neon_vsli;
13579 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13580 return EmitNeonCall(Intrin, Ops, "vsli_n");
13581 }
13582 case NEON::BI__builtin_neon_vsra_n_v:
13583 case NEON::BI__builtin_neon_vsraq_n_v:
13584 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13585 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13586 return Builder.CreateAdd(Ops[0], Ops[1]);
13587 case NEON::BI__builtin_neon_vrsra_n_v:
13588 case NEON::BI__builtin_neon_vrsraq_n_v: {
13589 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13591 TmpOps.push_back(Ops[1]);
13592 TmpOps.push_back(Ops[2]);
13593 Function* F = CGM.getIntrinsic(Int, Ty);
13594 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13595 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13596 return Builder.CreateAdd(Ops[0], tmp);
13597 }
13598 case NEON::BI__builtin_neon_vld1_v:
13599 case NEON::BI__builtin_neon_vld1q_v: {
13600 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13601 }
13602 case NEON::BI__builtin_neon_vst1_v:
13603 case NEON::BI__builtin_neon_vst1q_v:
13604 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13605 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13606 case NEON::BI__builtin_neon_vld1_lane_v:
13607 case NEON::BI__builtin_neon_vld1q_lane_v: {
13608 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13609 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13610 PtrOp0.getAlignment());
13611 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13612 }
13613 case NEON::BI__builtin_neon_vldap1_lane_s64:
13614 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13615 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13616 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13617 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13618 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13619 Ops[0] = LI;
13620 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13621 }
13622 case NEON::BI__builtin_neon_vld1_dup_v:
13623 case NEON::BI__builtin_neon_vld1q_dup_v: {
13624 Value *V = PoisonValue::get(Ty);
13625 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13626 PtrOp0.getAlignment());
13627 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13628 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13629 return EmitNeonSplat(Ops[0], CI);
13630 }
13631 case NEON::BI__builtin_neon_vst1_lane_v:
13632 case NEON::BI__builtin_neon_vst1q_lane_v:
13633 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13634 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13635 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13636 case NEON::BI__builtin_neon_vstl1_lane_s64:
13637 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13638 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13639 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13640 llvm::StoreInst *SI =
13641 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13642 SI->setAtomic(llvm::AtomicOrdering::Release);
13643 return SI;
13644 }
13645 case NEON::BI__builtin_neon_vld2_v:
13646 case NEON::BI__builtin_neon_vld2q_v: {
13647 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13648 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13649 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13650 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13651 }
13652 case NEON::BI__builtin_neon_vld3_v:
13653 case NEON::BI__builtin_neon_vld3q_v: {
13654 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13655 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13656 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13657 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13658 }
13659 case NEON::BI__builtin_neon_vld4_v:
13660 case NEON::BI__builtin_neon_vld4q_v: {
13661 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13662 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13663 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13664 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13665 }
13666 case NEON::BI__builtin_neon_vld2_dup_v:
13667 case NEON::BI__builtin_neon_vld2q_dup_v: {
13668 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13669 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13670 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13671 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13672 }
13673 case NEON::BI__builtin_neon_vld3_dup_v:
13674 case NEON::BI__builtin_neon_vld3q_dup_v: {
13675 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13676 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13677 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13678 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13679 }
13680 case NEON::BI__builtin_neon_vld4_dup_v:
13681 case NEON::BI__builtin_neon_vld4q_dup_v: {
13682 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13683 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13684 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13685 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13686 }
13687 case NEON::BI__builtin_neon_vld2_lane_v:
13688 case NEON::BI__builtin_neon_vld2q_lane_v: {
13689 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13690 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13691 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13692 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13693 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13694 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13695 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13696 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13697 }
13698 case NEON::BI__builtin_neon_vld3_lane_v:
13699 case NEON::BI__builtin_neon_vld3q_lane_v: {
13700 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13701 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13702 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13703 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13704 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13705 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13706 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13707 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13708 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13709 }
13710 case NEON::BI__builtin_neon_vld4_lane_v:
13711 case NEON::BI__builtin_neon_vld4q_lane_v: {
13712 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13713 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13714 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13715 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13716 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13717 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13718 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13719 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13720 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13721 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13722 }
13723 case NEON::BI__builtin_neon_vst2_v:
13724 case NEON::BI__builtin_neon_vst2q_v: {
13725 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13726 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13727 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13728 Ops, "");
13729 }
13730 case NEON::BI__builtin_neon_vst2_lane_v:
13731 case NEON::BI__builtin_neon_vst2q_lane_v: {
13732 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13733 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13734 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13735 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13736 Ops, "");
13737 }
13738 case NEON::BI__builtin_neon_vst3_v:
13739 case NEON::BI__builtin_neon_vst3q_v: {
13740 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13741 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13742 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13743 Ops, "");
13744 }
13745 case NEON::BI__builtin_neon_vst3_lane_v:
13746 case NEON::BI__builtin_neon_vst3q_lane_v: {
13747 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13748 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13749 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13750 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13751 Ops, "");
13752 }
13753 case NEON::BI__builtin_neon_vst4_v:
13754 case NEON::BI__builtin_neon_vst4q_v: {
13755 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13756 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13757 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13758 Ops, "");
13759 }
13760 case NEON::BI__builtin_neon_vst4_lane_v:
13761 case NEON::BI__builtin_neon_vst4q_lane_v: {
13762 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13763 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13764 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13766 Ops, "");
13767 }
13768 case NEON::BI__builtin_neon_vtrn_v:
13769 case NEON::BI__builtin_neon_vtrnq_v: {
13770 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13771 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13772 Value *SV = nullptr;
13773
13774 for (unsigned vi = 0; vi != 2; ++vi) {
13775 SmallVector<int, 16> Indices;
13776 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13777 Indices.push_back(i+vi);
13778 Indices.push_back(i+e+vi);
13779 }
13780 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13781 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13782 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13783 }
13784 return SV;
13785 }
13786 case NEON::BI__builtin_neon_vuzp_v:
13787 case NEON::BI__builtin_neon_vuzpq_v: {
13788 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13789 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13790 Value *SV = nullptr;
13791
13792 for (unsigned vi = 0; vi != 2; ++vi) {
13793 SmallVector<int, 16> Indices;
13794 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13795 Indices.push_back(2*i+vi);
13796
13797 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13798 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13799 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13800 }
13801 return SV;
13802 }
13803 case NEON::BI__builtin_neon_vzip_v:
13804 case NEON::BI__builtin_neon_vzipq_v: {
13805 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13806 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13807 Value *SV = nullptr;
13808
13809 for (unsigned vi = 0; vi != 2; ++vi) {
13810 SmallVector<int, 16> Indices;
13811 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13812 Indices.push_back((i + vi*e) >> 1);
13813 Indices.push_back(((i + vi*e) >> 1)+e);
13814 }
13815 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13816 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13817 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13818 }
13819 return SV;
13820 }
13821 case NEON::BI__builtin_neon_vqtbl1q_v: {
13822 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13823 Ops, "vtbl1");
13824 }
13825 case NEON::BI__builtin_neon_vqtbl2q_v: {
13826 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13827 Ops, "vtbl2");
13828 }
13829 case NEON::BI__builtin_neon_vqtbl3q_v: {
13830 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13831 Ops, "vtbl3");
13832 }
13833 case NEON::BI__builtin_neon_vqtbl4q_v: {
13834 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13835 Ops, "vtbl4");
13836 }
13837 case NEON::BI__builtin_neon_vqtbx1q_v: {
13838 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13839 Ops, "vtbx1");
13840 }
13841 case NEON::BI__builtin_neon_vqtbx2q_v: {
13842 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13843 Ops, "vtbx2");
13844 }
13845 case NEON::BI__builtin_neon_vqtbx3q_v: {
13846 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13847 Ops, "vtbx3");
13848 }
13849 case NEON::BI__builtin_neon_vqtbx4q_v: {
13850 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13851 Ops, "vtbx4");
13852 }
13853 case NEON::BI__builtin_neon_vsqadd_v:
13854 case NEON::BI__builtin_neon_vsqaddq_v: {
13855 Int = Intrinsic::aarch64_neon_usqadd;
13856 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13857 }
13858 case NEON::BI__builtin_neon_vuqadd_v:
13859 case NEON::BI__builtin_neon_vuqaddq_v: {
13860 Int = Intrinsic::aarch64_neon_suqadd;
13861 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13862 }
13863
13864 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13865 case NEON::BI__builtin_neon_vluti2_laneq_f16:
13866 case NEON::BI__builtin_neon_vluti2_laneq_p16:
13867 case NEON::BI__builtin_neon_vluti2_laneq_p8:
13868 case NEON::BI__builtin_neon_vluti2_laneq_s16:
13869 case NEON::BI__builtin_neon_vluti2_laneq_s8:
13870 case NEON::BI__builtin_neon_vluti2_laneq_u16:
13871 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13872 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13873 llvm::Type *Tys[2];
13874 Tys[0] = Ty;
13875 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13876 /*isQuad*/ false));
13877 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13878 }
13879 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13880 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13881 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13882 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
13883 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
13884 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
13885 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
13886 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
13887 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13888 llvm::Type *Tys[2];
13889 Tys[0] = Ty;
13890 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13891 /*isQuad*/ true));
13892 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13893 }
13894 case NEON::BI__builtin_neon_vluti2_lane_bf16:
13895 case NEON::BI__builtin_neon_vluti2_lane_f16:
13896 case NEON::BI__builtin_neon_vluti2_lane_p16:
13897 case NEON::BI__builtin_neon_vluti2_lane_p8:
13898 case NEON::BI__builtin_neon_vluti2_lane_s16:
13899 case NEON::BI__builtin_neon_vluti2_lane_s8:
13900 case NEON::BI__builtin_neon_vluti2_lane_u16:
13901 case NEON::BI__builtin_neon_vluti2_lane_u8: {
13902 Int = Intrinsic::aarch64_neon_vluti2_lane;
13903 llvm::Type *Tys[2];
13904 Tys[0] = Ty;
13905 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13906 /*isQuad*/ false));
13907 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13908 }
13909 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
13910 case NEON::BI__builtin_neon_vluti2q_lane_f16:
13911 case NEON::BI__builtin_neon_vluti2q_lane_p16:
13912 case NEON::BI__builtin_neon_vluti2q_lane_p8:
13913 case NEON::BI__builtin_neon_vluti2q_lane_s16:
13914 case NEON::BI__builtin_neon_vluti2q_lane_s8:
13915 case NEON::BI__builtin_neon_vluti2q_lane_u16:
13916 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
13917 Int = Intrinsic::aarch64_neon_vluti2_lane;
13918 llvm::Type *Tys[2];
13919 Tys[0] = Ty;
13920 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13921 /*isQuad*/ true));
13922 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13923 }
13924 case NEON::BI__builtin_neon_vluti4q_lane_p8:
13925 case NEON::BI__builtin_neon_vluti4q_lane_s8:
13926 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
13927 Int = Intrinsic::aarch64_neon_vluti4q_lane;
13928 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
13929 }
13930 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
13931 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
13932 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
13933 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
13934 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
13935 }
13936 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
13937 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
13938 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
13939 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
13940 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
13941 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
13942 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
13943 }
13944 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
13945 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
13946 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
13947 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
13948 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
13949 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
13950 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
13951 }
13952
13953 case NEON::BI__builtin_neon_vamin_f16:
13954 case NEON::BI__builtin_neon_vaminq_f16:
13955 case NEON::BI__builtin_neon_vamin_f32:
13956 case NEON::BI__builtin_neon_vaminq_f32:
13957 case NEON::BI__builtin_neon_vaminq_f64: {
13958 Int = Intrinsic::aarch64_neon_famin;
13959 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
13960 }
13961 case NEON::BI__builtin_neon_vamax_f16:
13962 case NEON::BI__builtin_neon_vamaxq_f16:
13963 case NEON::BI__builtin_neon_vamax_f32:
13964 case NEON::BI__builtin_neon_vamaxq_f32:
13965 case NEON::BI__builtin_neon_vamaxq_f64: {
13966 Int = Intrinsic::aarch64_neon_famax;
13967 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
13968 }
13969 case NEON::BI__builtin_neon_vscale_f16:
13970 case NEON::BI__builtin_neon_vscaleq_f16:
13971 case NEON::BI__builtin_neon_vscale_f32:
13972 case NEON::BI__builtin_neon_vscaleq_f32:
13973 case NEON::BI__builtin_neon_vscaleq_f64: {
13974 Int = Intrinsic::aarch64_neon_fp8_fscale;
13975 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
13976 }
13977 }
13978}
13979
13980Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13981 const CallExpr *E) {
13982 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13983 BuiltinID == BPF::BI__builtin_btf_type_id ||
13984 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13985 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13986 "unexpected BPF builtin");
13987
13988 // A sequence number, injected into IR builtin functions, to
13989 // prevent CSE given the only difference of the function
13990 // may just be the debuginfo metadata.
13991 static uint32_t BuiltinSeqNum;
13992
13993 switch (BuiltinID) {
13994 default:
13995 llvm_unreachable("Unexpected BPF builtin");
13996 case BPF::BI__builtin_preserve_field_info: {
13997 const Expr *Arg = E->getArg(0);
13998 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13999
14000 if (!getDebugInfo()) {
14001 CGM.Error(E->getExprLoc(),
14002 "using __builtin_preserve_field_info() without -g");
14003 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14004 : EmitLValue(Arg).emitRawPointer(*this);
14005 }
14006
14007 // Enable underlying preserve_*_access_index() generation.
14008 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
14009 IsInPreservedAIRegion = true;
14010 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14011 : EmitLValue(Arg).emitRawPointer(*this);
14012 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
14013
14014 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14015 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
14016
14017 // Built the IR for the preserve_field_info intrinsic.
14018 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14019 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14020 {FieldAddr->getType()});
14021 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14022 }
14023 case BPF::BI__builtin_btf_type_id:
14024 case BPF::BI__builtin_preserve_type_info: {
14025 if (!getDebugInfo()) {
14026 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14027 return nullptr;
14028 }
14029
14030 const Expr *Arg0 = E->getArg(0);
14031 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14032 Arg0->getType(), Arg0->getExprLoc());
14033
14034 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14035 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14036 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14037
14038 llvm::Function *FnDecl;
14039 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14040 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14041 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14042 else
14043 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14044 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14045 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14046 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14047 return Fn;
14048 }
14049 case BPF::BI__builtin_preserve_enum_value: {
14050 if (!getDebugInfo()) {
14051 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14052 return nullptr;
14053 }
14054
14055 const Expr *Arg0 = E->getArg(0);
14056 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14057 Arg0->getType(), Arg0->getExprLoc());
14058
14059 // Find enumerator
14060 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
14061 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14062 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14063 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14064
14065 auto InitVal = Enumerator->getInitVal();
14066 std::string InitValStr;
14067 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
14068 InitValStr = std::to_string(InitVal.getSExtValue());
14069 else
14070 InitValStr = std::to_string(InitVal.getZExtValue());
14071 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
14072 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
14073
14074 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14075 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14076 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14077
14078 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14079 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14080 CallInst *Fn =
14081 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14082 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14083 return Fn;
14084 }
14085 }
14086}
14087
14088llvm::Value *CodeGenFunction::
14090 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14091 "Not a power-of-two sized vector!");
14092 bool AllConstants = true;
14093 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14094 AllConstants &= isa<Constant>(Ops[i]);
14095
14096 // If this is a constant vector, create a ConstantVector.
14097 if (AllConstants) {
14099 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14100 CstOps.push_back(cast<Constant>(Ops[i]));
14101 return llvm::ConstantVector::get(CstOps);
14102 }
14103
14104 // Otherwise, insertelement the values to build the vector.
14105 Value *Result = llvm::PoisonValue::get(
14106 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14107
14108 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14109 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
14110
14111 return Result;
14112}
14113
14114// Convert the mask from an integer type to a vector of i1.
14116 unsigned NumElts) {
14117
14118 auto *MaskTy = llvm::FixedVectorType::get(
14119 CGF.Builder.getInt1Ty(),
14120 cast<IntegerType>(Mask->getType())->getBitWidth());
14121 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
14122
14123 // If we have less than 8 elements, then the starting mask was an i8 and
14124 // we need to extract down to the right number of elements.
14125 if (NumElts < 8) {
14126 int Indices[4];
14127 for (unsigned i = 0; i != NumElts; ++i)
14128 Indices[i] = i;
14129 MaskVec = CGF.Builder.CreateShuffleVector(
14130 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
14131 }
14132 return MaskVec;
14133}
14134
14136 Align Alignment) {
14137 Value *Ptr = Ops[0];
14138
14139 Value *MaskVec = getMaskVecValue(
14140 CGF, Ops[2],
14141 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14142
14143 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14144}
14145
14147 Align Alignment) {
14148 llvm::Type *Ty = Ops[1]->getType();
14149 Value *Ptr = Ops[0];
14150
14151 Value *MaskVec = getMaskVecValue(
14152 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14153
14154 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14155}
14156
14158 ArrayRef<Value *> Ops) {
14159 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14160 Value *Ptr = Ops[0];
14161
14162 Value *MaskVec = getMaskVecValue(
14163 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14164
14165 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
14166 ResultTy);
14167 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14168}
14169
14172 bool IsCompress) {
14173 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14174
14175 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14176
14177 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14178 : Intrinsic::x86_avx512_mask_expand;
14179 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
14180 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14181}
14182
14184 ArrayRef<Value *> Ops) {
14185 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14186 Value *Ptr = Ops[0];
14187
14188 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14189
14190 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
14191 ResultTy);
14192 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14193}
14194
14195static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
14197 bool InvertLHS = false) {
14198 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14199 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
14200 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
14201
14202 if (InvertLHS)
14203 LHS = CGF.Builder.CreateNot(LHS);
14204
14205 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
14206 Ops[0]->getType());
14207}
14208
14210 Value *Amt, bool IsRight) {
14211 llvm::Type *Ty = Op0->getType();
14212
14213 // Amount may be scalar immediate, in which case create a splat vector.
14214 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14215 // we only care about the lowest log2 bits anyway.
14216 if (Amt->getType() != Ty) {
14217 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14218 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
14219 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
14220 }
14221
14222 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14223 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
14224 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
14225}
14226
14228 bool IsSigned) {
14229 Value *Op0 = Ops[0];
14230 Value *Op1 = Ops[1];
14231 llvm::Type *Ty = Op0->getType();
14232 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14233
14234 CmpInst::Predicate Pred;
14235 switch (Imm) {
14236 case 0x0:
14237 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14238 break;
14239 case 0x1:
14240 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14241 break;
14242 case 0x2:
14243 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14244 break;
14245 case 0x3:
14246 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14247 break;
14248 case 0x4:
14249 Pred = ICmpInst::ICMP_EQ;
14250 break;
14251 case 0x5:
14252 Pred = ICmpInst::ICMP_NE;
14253 break;
14254 case 0x6:
14255 return llvm::Constant::getNullValue(Ty); // FALSE
14256 case 0x7:
14257 return llvm::Constant::getAllOnesValue(Ty); // TRUE
14258 default:
14259 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14260 }
14261
14262 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
14263 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
14264 return Res;
14265}
14266
14268 Value *Mask, Value *Op0, Value *Op1) {
14269
14270 // If the mask is all ones just return first argument.
14271 if (const auto *C = dyn_cast<Constant>(Mask))
14272 if (C->isAllOnesValue())
14273 return Op0;
14274
14275 Mask = getMaskVecValue(
14276 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
14277
14278 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14279}
14280
14282 Value *Mask, Value *Op0, Value *Op1) {
14283 // If the mask is all ones just return first argument.
14284 if (const auto *C = dyn_cast<Constant>(Mask))
14285 if (C->isAllOnesValue())
14286 return Op0;
14287
14288 auto *MaskTy = llvm::FixedVectorType::get(
14289 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
14290 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
14291 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
14292 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14293}
14294
14296 unsigned NumElts, Value *MaskIn) {
14297 if (MaskIn) {
14298 const auto *C = dyn_cast<Constant>(MaskIn);
14299 if (!C || !C->isAllOnesValue())
14300 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
14301 }
14302
14303 if (NumElts < 8) {
14304 int Indices[8];
14305 for (unsigned i = 0; i != NumElts; ++i)
14306 Indices[i] = i;
14307 for (unsigned i = NumElts; i != 8; ++i)
14308 Indices[i] = i % NumElts + NumElts;
14309 Cmp = CGF.Builder.CreateShuffleVector(
14310 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14311 }
14312
14313 return CGF.Builder.CreateBitCast(Cmp,
14314 IntegerType::get(CGF.getLLVMContext(),
14315 std::max(NumElts, 8U)));
14316}
14317
14319 bool Signed, ArrayRef<Value *> Ops) {
14320 assert((Ops.size() == 2 || Ops.size() == 4) &&
14321 "Unexpected number of arguments");
14322 unsigned NumElts =
14323 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14324 Value *Cmp;
14325
14326 if (CC == 3) {
14327 Cmp = Constant::getNullValue(
14328 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14329 } else if (CC == 7) {
14330 Cmp = Constant::getAllOnesValue(
14331 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14332 } else {
14333 ICmpInst::Predicate Pred;
14334 switch (CC) {
14335 default: llvm_unreachable("Unknown condition code");
14336 case 0: Pred = ICmpInst::ICMP_EQ; break;
14337 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
14338 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
14339 case 4: Pred = ICmpInst::ICMP_NE; break;
14340 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
14341 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
14342 }
14343 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14344 }
14345
14346 Value *MaskIn = nullptr;
14347 if (Ops.size() == 4)
14348 MaskIn = Ops[3];
14349
14350 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
14351}
14352
14354 Value *Zero = Constant::getNullValue(In->getType());
14355 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
14356}
14357
14359 ArrayRef<Value *> Ops, bool IsSigned) {
14360 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14361 llvm::Type *Ty = Ops[1]->getType();
14362
14363 Value *Res;
14364 if (Rnd != 4) {
14365 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14366 : Intrinsic::x86_avx512_uitofp_round;
14367 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
14368 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
14369 } else {
14370 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14371 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
14372 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
14373 }
14374
14375 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14376}
14377
14378// Lowers X86 FMA intrinsics to IR.
14380 ArrayRef<Value *> Ops, unsigned BuiltinID,
14381 bool IsAddSub) {
14382
14383 bool Subtract = false;
14384 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14385 switch (BuiltinID) {
14386 default: break;
14387 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14388 Subtract = true;
14389 [[fallthrough]];
14390 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14391 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14392 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14393 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14394 break;
14395 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14396 Subtract = true;
14397 [[fallthrough]];
14398 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14399 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14400 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14401 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14402 break;
14403 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14404 Subtract = true;
14405 [[fallthrough]];
14406 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14407 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14408 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14409 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
14410 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14411 Subtract = true;
14412 [[fallthrough]];
14413 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14414 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14415 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14416 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
14417 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14418 Subtract = true;
14419 [[fallthrough]];
14420 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14421 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14422 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14423 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14424 break;
14425 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14426 Subtract = true;
14427 [[fallthrough]];
14428 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14429 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14430 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14431 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14432 break;
14433 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14434 Subtract = true;
14435 LLVM_FALLTHROUGH;
14436 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14437 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14438 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14439 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14440 break;
14441 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14442 Subtract = true;
14443 LLVM_FALLTHROUGH;
14444 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14445 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14446 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14447 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14448 break;
14449 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14450 Subtract = true;
14451 LLVM_FALLTHROUGH;
14452 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14453 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14454 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14455 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14456 break;
14457 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14458 Subtract = true;
14459 LLVM_FALLTHROUGH;
14460 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14461 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14462 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14463 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14464 break;
14465 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14466 Subtract = true;
14467 LLVM_FALLTHROUGH;
14468 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14469 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14470 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14471 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14472 break;
14473 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14474 Subtract = true;
14475 LLVM_FALLTHROUGH;
14476 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14477 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14478 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14479 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14480 break;
14481 }
14482
14483 Value *A = Ops[0];
14484 Value *B = Ops[1];
14485 Value *C = Ops[2];
14486
14487 if (Subtract)
14488 C = CGF.Builder.CreateFNeg(C);
14489
14490 Value *Res;
14491
14492 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14493 if (IID != Intrinsic::not_intrinsic &&
14494 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14495 IsAddSub)) {
14496 Function *Intr = CGF.CGM.getIntrinsic(IID);
14497 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14498 } else {
14499 llvm::Type *Ty = A->getType();
14500 Function *FMA;
14501 if (CGF.Builder.getIsFPConstrained()) {
14502 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14503 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14504 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14505 } else {
14506 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14507 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14508 }
14509 }
14510
14511 // Handle any required masking.
14512 Value *MaskFalseVal = nullptr;
14513 switch (BuiltinID) {
14514 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14515 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14516 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14517 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14518 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14519 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14520 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14521 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14522 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14523 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14524 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14525 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14526 MaskFalseVal = Ops[0];
14527 break;
14528 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14529 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14530 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14531 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14532 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14533 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14534 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14535 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14536 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14537 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14538 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14539 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14540 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14541 break;
14542 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14543 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14544 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14545 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14546 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14547 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14548 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14549 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14550 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14551 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14552 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14553 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14554 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14555 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14556 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14557 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14558 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14559 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14560 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14561 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14562 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14563 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14564 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14565 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14566 MaskFalseVal = Ops[2];
14567 break;
14568 }
14569
14570 if (MaskFalseVal)
14571 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14572
14573 return Res;
14574}
14575
14577 MutableArrayRef<Value *> Ops, Value *Upper,
14578 bool ZeroMask = false, unsigned PTIdx = 0,
14579 bool NegAcc = false) {
14580 unsigned Rnd = 4;
14581 if (Ops.size() > 4)
14582 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14583
14584 if (NegAcc)
14585 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14586
14587 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14588 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14589 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14590 Value *Res;
14591 if (Rnd != 4) {
14592 Intrinsic::ID IID;
14593
14594 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14595 case 16:
14596 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14597 break;
14598 case 32:
14599 IID = Intrinsic::x86_avx512_vfmadd_f32;
14600 break;
14601 case 64:
14602 IID = Intrinsic::x86_avx512_vfmadd_f64;
14603 break;
14604 default:
14605 llvm_unreachable("Unexpected size");
14606 }
14607 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14608 {Ops[0], Ops[1], Ops[2], Ops[4]});
14609 } else if (CGF.Builder.getIsFPConstrained()) {
14610 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14611 Function *FMA = CGF.CGM.getIntrinsic(
14612 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14613 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14614 } else {
14615 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14616 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14617 }
14618 // If we have more than 3 arguments, we need to do masking.
14619 if (Ops.size() > 3) {
14620 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14621 : Ops[PTIdx];
14622
14623 // If we negated the accumulator and the its the PassThru value we need to
14624 // bypass the negate. Conveniently Upper should be the same thing in this
14625 // case.
14626 if (NegAcc && PTIdx == 2)
14627 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14628
14629 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14630 }
14631 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14632}
14633
14634static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14635 ArrayRef<Value *> Ops) {
14636 llvm::Type *Ty = Ops[0]->getType();
14637 // Arguments have a vXi32 type so cast to vXi64.
14638 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14639 Ty->getPrimitiveSizeInBits() / 64);
14640 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14641 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14642
14643 if (IsSigned) {
14644 // Shift left then arithmetic shift right.
14645 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14646 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14647 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14648 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14649 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14650 } else {
14651 // Clear the upper bits.
14652 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14653 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14654 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14655 }
14656
14657 return CGF.Builder.CreateMul(LHS, RHS);
14658}
14659
14660// Emit a masked pternlog intrinsic. This only exists because the header has to
14661// use a macro and we aren't able to pass the input argument to a pternlog
14662// builtin and a select builtin without evaluating it twice.
14663static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14664 ArrayRef<Value *> Ops) {
14665 llvm::Type *Ty = Ops[0]->getType();
14666
14667 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14668 unsigned EltWidth = Ty->getScalarSizeInBits();
14669 Intrinsic::ID IID;
14670 if (VecWidth == 128 && EltWidth == 32)
14671 IID = Intrinsic::x86_avx512_pternlog_d_128;
14672 else if (VecWidth == 256 && EltWidth == 32)
14673 IID = Intrinsic::x86_avx512_pternlog_d_256;
14674 else if (VecWidth == 512 && EltWidth == 32)
14675 IID = Intrinsic::x86_avx512_pternlog_d_512;
14676 else if (VecWidth == 128 && EltWidth == 64)
14677 IID = Intrinsic::x86_avx512_pternlog_q_128;
14678 else if (VecWidth == 256 && EltWidth == 64)
14679 IID = Intrinsic::x86_avx512_pternlog_q_256;
14680 else if (VecWidth == 512 && EltWidth == 64)
14681 IID = Intrinsic::x86_avx512_pternlog_q_512;
14682 else
14683 llvm_unreachable("Unexpected intrinsic");
14684
14685 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14686 Ops.drop_back());
14687 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14688 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14689}
14690
14692 llvm::Type *DstTy) {
14693 unsigned NumberOfElements =
14694 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14695 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14696 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14697}
14698
14699Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14700 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14701 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14702 return EmitX86CpuIs(CPUStr);
14703}
14704
14705// Convert F16 halfs to floats.
14708 llvm::Type *DstTy) {
14709 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14710 "Unknown cvtph2ps intrinsic");
14711
14712 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14713 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14714 Function *F =
14715 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14716 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14717 }
14718
14719 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14720 Value *Src = Ops[0];
14721
14722 // Extract the subvector.
14723 if (NumDstElts !=
14724 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14725 assert(NumDstElts == 4 && "Unexpected vector size");
14726 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14727 }
14728
14729 // Bitcast from vXi16 to vXf16.
14730 auto *HalfTy = llvm::FixedVectorType::get(
14731 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14732 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14733
14734 // Perform the fp-extension.
14735 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14736
14737 if (Ops.size() >= 3)
14738 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14739 return Res;
14740}
14741
14742Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14743
14744 llvm::Type *Int32Ty = Builder.getInt32Ty();
14745
14746 // Matching the struct layout from the compiler-rt/libgcc structure that is
14747 // filled in:
14748 // unsigned int __cpu_vendor;
14749 // unsigned int __cpu_type;
14750 // unsigned int __cpu_subtype;
14751 // unsigned int __cpu_features[1];
14752 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14753 llvm::ArrayType::get(Int32Ty, 1));
14754
14755 // Grab the global __cpu_model.
14756 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14757 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14758
14759 // Calculate the index needed to access the correct field based on the
14760 // range. Also adjust the expected value.
14761 unsigned Index;
14762 unsigned Value;
14763 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14764#define X86_VENDOR(ENUM, STRING) \
14765 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14766#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14767 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14768#define X86_CPU_TYPE(ENUM, STR) \
14769 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14770#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14771 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14772#define X86_CPU_SUBTYPE(ENUM, STR) \
14773 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14774#include "llvm/TargetParser/X86TargetParser.def"
14775 .Default({0, 0});
14776 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14777
14778 // Grab the appropriate field from __cpu_model.
14779 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14780 ConstantInt::get(Int32Ty, Index)};
14781 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14782 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14784
14785 // Check the value of the field against the requested value.
14786 return Builder.CreateICmpEQ(CpuValue,
14787 llvm::ConstantInt::get(Int32Ty, Value));
14788}
14789
14790Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14791 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14792 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14793 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14794 return Builder.getFalse();
14795 return EmitX86CpuSupports(FeatureStr);
14796}
14797
14798Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14799 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14800}
14801
14802llvm::Value *
14803CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14804 Value *Result = Builder.getTrue();
14805 if (FeatureMask[0] != 0) {
14806 // Matching the struct layout from the compiler-rt/libgcc structure that is
14807 // filled in:
14808 // unsigned int __cpu_vendor;
14809 // unsigned int __cpu_type;
14810 // unsigned int __cpu_subtype;
14811 // unsigned int __cpu_features[1];
14812 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14813 llvm::ArrayType::get(Int32Ty, 1));
14814
14815 // Grab the global __cpu_model.
14816 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14817 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14818
14819 // Grab the first (0th) element from the field __cpu_features off of the
14820 // global in the struct STy.
14821 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14822 Builder.getInt32(0)};
14823 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14824 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14826
14827 // Check the value of the bit corresponding to the feature requested.
14828 Value *Mask = Builder.getInt32(FeatureMask[0]);
14829 Value *Bitset = Builder.CreateAnd(Features, Mask);
14830 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14831 Result = Builder.CreateAnd(Result, Cmp);
14832 }
14833
14834 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14835 llvm::Constant *CpuFeatures2 =
14836 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14837 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14838 for (int i = 1; i != 4; ++i) {
14839 const uint32_t M = FeatureMask[i];
14840 if (!M)
14841 continue;
14842 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14843 Value *Features = Builder.CreateAlignedLoad(
14844 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14846 // Check the value of the bit corresponding to the feature requested.
14847 Value *Mask = Builder.getInt32(M);
14848 Value *Bitset = Builder.CreateAnd(Features, Mask);
14849 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14850 Result = Builder.CreateAnd(Result, Cmp);
14851 }
14852
14853 return Result;
14854}
14855
14856Value *CodeGenFunction::EmitAArch64CpuInit() {
14857 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14858 llvm::FunctionCallee Func =
14859 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14860 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14861 cast<llvm::GlobalValue>(Func.getCallee())
14862 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14863 return Builder.CreateCall(Func);
14864}
14865
14867 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
14868 llvm::FunctionCallee Func =
14869 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14870 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14871 CalleeGV->setDSOLocal(true);
14872 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14873 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
14874}
14875
14876Value *CodeGenFunction::EmitX86CpuInit() {
14877 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14878 /*Variadic*/ false);
14879 llvm::FunctionCallee Func =
14880 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14881 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14882 cast<llvm::GlobalValue>(Func.getCallee())
14883 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14884 return Builder.CreateCall(Func);
14885}
14886
14887Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14888 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14889 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14891 ArgStr.split(Features, "+");
14892 for (auto &Feature : Features) {
14893 Feature = Feature.trim();
14894 if (!llvm::AArch64::parseFMVExtension(Feature))
14895 return Builder.getFalse();
14896 if (Feature != "default")
14897 Features.push_back(Feature);
14898 }
14899 return EmitAArch64CpuSupports(Features);
14900}
14901
14902llvm::Value *
14903CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14904 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14905 Value *Result = Builder.getTrue();
14906 if (FeaturesMask != 0) {
14907 // Get features from structure in runtime library
14908 // struct {
14909 // unsigned long long features;
14910 // } __aarch64_cpu_features;
14911 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14912 llvm::Constant *AArch64CPUFeatures =
14913 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14914 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14915 llvm::Value *CpuFeatures = Builder.CreateGEP(
14916 STy, AArch64CPUFeatures,
14917 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14918 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14920 Value *Mask = Builder.getInt64(FeaturesMask);
14921 Value *Bitset = Builder.CreateAnd(Features, Mask);
14922 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14923 Result = Builder.CreateAnd(Result, Cmp);
14924 }
14925 return Result;
14926}
14927
14929
14930 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14931 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14932 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14933 return Builder.getFalse();
14934
14935 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
14936}
14937
14938static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
14939 CodeGenModule &CGM) {
14940 llvm::Type *Int32Ty = Builder.getInt32Ty();
14941 llvm::Type *Int64Ty = Builder.getInt64Ty();
14942 llvm::ArrayType *ArrayOfInt64Ty =
14943 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
14944 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
14945 llvm::Constant *RISCVFeaturesBits =
14946 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
14947 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
14948 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
14949 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
14950 IndexVal};
14951 Value *Ptr =
14952 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
14953 Value *FeaturesBit =
14954 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
14955 return FeaturesBit;
14956}
14957
14959 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
14960 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
14961
14962 for (auto Feat : FeaturesStrs) {
14963 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
14964
14965 // If there isn't BitPos for this feature, skip this version.
14966 // It also report the warning to user during compilation.
14967 if (BitPos == -1)
14968 return Builder.getFalse();
14969
14970 RequireBitMasks[GroupID] |= (1ULL << BitPos);
14971 }
14972
14973 Value *Result = nullptr;
14974 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
14975 if (RequireBitMasks[Idx] == 0)
14976 continue;
14977
14978 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
14979 Value *Bitset =
14980 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
14981 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
14982 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
14983 }
14984
14985 assert(Result && "Should have value here.");
14986
14987 return Result;
14988}
14989
14990Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14991 const CallExpr *E) {
14992 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14993 return EmitX86CpuIs(E);
14994 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14995 return EmitX86CpuSupports(E);
14996 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14997 return EmitX86CpuInit();
14998
14999 // Handle MSVC intrinsics before argument evaluation to prevent double
15000 // evaluation.
15001 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
15002 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
15003
15005 bool IsMaskFCmp = false;
15006 bool IsConjFMA = false;
15007
15008 // Find out if any arguments are required to be integer constant expressions.
15009 unsigned ICEArguments = 0;
15011 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
15012 assert(Error == ASTContext::GE_None && "Should not codegen an error");
15013
15014 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
15015 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
15016 }
15017
15018 // These exist so that the builtin that takes an immediate can be bounds
15019 // checked by clang to avoid passing bad immediates to the backend. Since
15020 // AVX has a larger immediate than SSE we would need separate builtins to
15021 // do the different bounds checking. Rather than create a clang specific
15022 // SSE only builtin, this implements eight separate builtins to match gcc
15023 // implementation.
15024 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
15025 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
15026 llvm::Function *F = CGM.getIntrinsic(ID);
15027 return Builder.CreateCall(F, Ops);
15028 };
15029
15030 // For the vector forms of FP comparisons, translate the builtins directly to
15031 // IR.
15032 // TODO: The builtins could be removed if the SSE header files used vector
15033 // extension comparisons directly (vector ordered/unordered may need
15034 // additional support via __builtin_isnan()).
15035 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
15036 bool IsSignaling) {
15037 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15038 Value *Cmp;
15039 if (IsSignaling)
15040 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15041 else
15042 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15043 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15044 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15045 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
15046 return Builder.CreateBitCast(Sext, FPVecTy);
15047 };
15048
15049 switch (BuiltinID) {
15050 default: return nullptr;
15051 case X86::BI_mm_prefetch: {
15052 Value *Address = Ops[0];
15053 ConstantInt *C = cast<ConstantInt>(Ops[1]);
15054 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
15055 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
15056 Value *Data = ConstantInt::get(Int32Ty, 1);
15057 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
15058 return Builder.CreateCall(F, {Address, RW, Locality, Data});
15059 }
15060 case X86::BI_mm_clflush: {
15061 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
15062 Ops[0]);
15063 }
15064 case X86::BI_mm_lfence: {
15065 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
15066 }
15067 case X86::BI_mm_mfence: {
15068 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
15069 }
15070 case X86::BI_mm_sfence: {
15071 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
15072 }
15073 case X86::BI_mm_pause: {
15074 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
15075 }
15076 case X86::BI__rdtsc: {
15077 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
15078 }
15079 case X86::BI__builtin_ia32_rdtscp: {
15080 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
15081 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15082 Ops[0]);
15083 return Builder.CreateExtractValue(Call, 0);
15084 }
15085 case X86::BI__builtin_ia32_lzcnt_u16:
15086 case X86::BI__builtin_ia32_lzcnt_u32:
15087 case X86::BI__builtin_ia32_lzcnt_u64: {
15088 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15089 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15090 }
15091 case X86::BI__builtin_ia32_tzcnt_u16:
15092 case X86::BI__builtin_ia32_tzcnt_u32:
15093 case X86::BI__builtin_ia32_tzcnt_u64: {
15094 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
15095 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15096 }
15097 case X86::BI__builtin_ia32_undef128:
15098 case X86::BI__builtin_ia32_undef256:
15099 case X86::BI__builtin_ia32_undef512:
15100 // The x86 definition of "undef" is not the same as the LLVM definition
15101 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15102 // IR optimizer and backend.
15103 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15104 // value, we should use that here instead of a zero.
15105 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15106 case X86::BI__builtin_ia32_vec_ext_v4hi:
15107 case X86::BI__builtin_ia32_vec_ext_v16qi:
15108 case X86::BI__builtin_ia32_vec_ext_v8hi:
15109 case X86::BI__builtin_ia32_vec_ext_v4si:
15110 case X86::BI__builtin_ia32_vec_ext_v4sf:
15111 case X86::BI__builtin_ia32_vec_ext_v2di:
15112 case X86::BI__builtin_ia32_vec_ext_v32qi:
15113 case X86::BI__builtin_ia32_vec_ext_v16hi:
15114 case X86::BI__builtin_ia32_vec_ext_v8si:
15115 case X86::BI__builtin_ia32_vec_ext_v4di: {
15116 unsigned NumElts =
15117 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15118 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15119 Index &= NumElts - 1;
15120 // These builtins exist so we can ensure the index is an ICE and in range.
15121 // Otherwise we could just do this in the header file.
15122 return Builder.CreateExtractElement(Ops[0], Index);
15123 }
15124 case X86::BI__builtin_ia32_vec_set_v4hi:
15125 case X86::BI__builtin_ia32_vec_set_v16qi:
15126 case X86::BI__builtin_ia32_vec_set_v8hi:
15127 case X86::BI__builtin_ia32_vec_set_v4si:
15128 case X86::BI__builtin_ia32_vec_set_v2di:
15129 case X86::BI__builtin_ia32_vec_set_v32qi:
15130 case X86::BI__builtin_ia32_vec_set_v16hi:
15131 case X86::BI__builtin_ia32_vec_set_v8si:
15132 case X86::BI__builtin_ia32_vec_set_v4di: {
15133 unsigned NumElts =
15134 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15135 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15136 Index &= NumElts - 1;
15137 // These builtins exist so we can ensure the index is an ICE and in range.
15138 // Otherwise we could just do this in the header file.
15139 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15140 }
15141 case X86::BI_mm_setcsr:
15142 case X86::BI__builtin_ia32_ldmxcsr: {
15143 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
15144 Builder.CreateStore(Ops[0], Tmp);
15145 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
15146 Tmp.getPointer());
15147 }
15148 case X86::BI_mm_getcsr:
15149 case X86::BI__builtin_ia32_stmxcsr: {
15151 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
15152 Tmp.getPointer());
15153 return Builder.CreateLoad(Tmp, "stmxcsr");
15154 }
15155 case X86::BI__builtin_ia32_xsave:
15156 case X86::BI__builtin_ia32_xsave64:
15157 case X86::BI__builtin_ia32_xrstor:
15158 case X86::BI__builtin_ia32_xrstor64:
15159 case X86::BI__builtin_ia32_xsaveopt:
15160 case X86::BI__builtin_ia32_xsaveopt64:
15161 case X86::BI__builtin_ia32_xrstors:
15162 case X86::BI__builtin_ia32_xrstors64:
15163 case X86::BI__builtin_ia32_xsavec:
15164 case X86::BI__builtin_ia32_xsavec64:
15165 case X86::BI__builtin_ia32_xsaves:
15166 case X86::BI__builtin_ia32_xsaves64:
15167 case X86::BI__builtin_ia32_xsetbv:
15168 case X86::BI_xsetbv: {
15169 Intrinsic::ID ID;
15170#define INTRINSIC_X86_XSAVE_ID(NAME) \
15171 case X86::BI__builtin_ia32_##NAME: \
15172 ID = Intrinsic::x86_##NAME; \
15173 break
15174 switch (BuiltinID) {
15175 default: llvm_unreachable("Unsupported intrinsic!");
15177 INTRINSIC_X86_XSAVE_ID(xsave64);
15178 INTRINSIC_X86_XSAVE_ID(xrstor);
15179 INTRINSIC_X86_XSAVE_ID(xrstor64);
15180 INTRINSIC_X86_XSAVE_ID(xsaveopt);
15181 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
15182 INTRINSIC_X86_XSAVE_ID(xrstors);
15183 INTRINSIC_X86_XSAVE_ID(xrstors64);
15184 INTRINSIC_X86_XSAVE_ID(xsavec);
15185 INTRINSIC_X86_XSAVE_ID(xsavec64);
15186 INTRINSIC_X86_XSAVE_ID(xsaves);
15187 INTRINSIC_X86_XSAVE_ID(xsaves64);
15188 INTRINSIC_X86_XSAVE_ID(xsetbv);
15189 case X86::BI_xsetbv:
15190 ID = Intrinsic::x86_xsetbv;
15191 break;
15192 }
15193#undef INTRINSIC_X86_XSAVE_ID
15194 Value *Mhi = Builder.CreateTrunc(
15195 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
15196 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
15197 Ops[1] = Mhi;
15198 Ops.push_back(Mlo);
15199 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15200 }
15201 case X86::BI__builtin_ia32_xgetbv:
15202 case X86::BI_xgetbv:
15203 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
15204 case X86::BI__builtin_ia32_storedqudi128_mask:
15205 case X86::BI__builtin_ia32_storedqusi128_mask:
15206 case X86::BI__builtin_ia32_storedquhi128_mask:
15207 case X86::BI__builtin_ia32_storedquqi128_mask:
15208 case X86::BI__builtin_ia32_storeupd128_mask:
15209 case X86::BI__builtin_ia32_storeups128_mask:
15210 case X86::BI__builtin_ia32_storedqudi256_mask:
15211 case X86::BI__builtin_ia32_storedqusi256_mask:
15212 case X86::BI__builtin_ia32_storedquhi256_mask:
15213 case X86::BI__builtin_ia32_storedquqi256_mask:
15214 case X86::BI__builtin_ia32_storeupd256_mask:
15215 case X86::BI__builtin_ia32_storeups256_mask:
15216 case X86::BI__builtin_ia32_storedqudi512_mask:
15217 case X86::BI__builtin_ia32_storedqusi512_mask:
15218 case X86::BI__builtin_ia32_storedquhi512_mask:
15219 case X86::BI__builtin_ia32_storedquqi512_mask:
15220 case X86::BI__builtin_ia32_storeupd512_mask:
15221 case X86::BI__builtin_ia32_storeups512_mask:
15222 return EmitX86MaskedStore(*this, Ops, Align(1));
15223
15224 case X86::BI__builtin_ia32_storesbf16128_mask:
15225 case X86::BI__builtin_ia32_storesh128_mask:
15226 case X86::BI__builtin_ia32_storess128_mask:
15227 case X86::BI__builtin_ia32_storesd128_mask:
15228 return EmitX86MaskedStore(*this, Ops, Align(1));
15229
15230 case X86::BI__builtin_ia32_cvtmask2b128:
15231 case X86::BI__builtin_ia32_cvtmask2b256:
15232 case X86::BI__builtin_ia32_cvtmask2b512:
15233 case X86::BI__builtin_ia32_cvtmask2w128:
15234 case X86::BI__builtin_ia32_cvtmask2w256:
15235 case X86::BI__builtin_ia32_cvtmask2w512:
15236 case X86::BI__builtin_ia32_cvtmask2d128:
15237 case X86::BI__builtin_ia32_cvtmask2d256:
15238 case X86::BI__builtin_ia32_cvtmask2d512:
15239 case X86::BI__builtin_ia32_cvtmask2q128:
15240 case X86::BI__builtin_ia32_cvtmask2q256:
15241 case X86::BI__builtin_ia32_cvtmask2q512:
15242 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
15243
15244 case X86::BI__builtin_ia32_cvtb2mask128:
15245 case X86::BI__builtin_ia32_cvtb2mask256:
15246 case X86::BI__builtin_ia32_cvtb2mask512:
15247 case X86::BI__builtin_ia32_cvtw2mask128:
15248 case X86::BI__builtin_ia32_cvtw2mask256:
15249 case X86::BI__builtin_ia32_cvtw2mask512:
15250 case X86::BI__builtin_ia32_cvtd2mask128:
15251 case X86::BI__builtin_ia32_cvtd2mask256:
15252 case X86::BI__builtin_ia32_cvtd2mask512:
15253 case X86::BI__builtin_ia32_cvtq2mask128:
15254 case X86::BI__builtin_ia32_cvtq2mask256:
15255 case X86::BI__builtin_ia32_cvtq2mask512:
15256 return EmitX86ConvertToMask(*this, Ops[0]);
15257
15258 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15259 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15260 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15261 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15262 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15263 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15264 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15265 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15266 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15267 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15268 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15269 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15270 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
15271 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15272 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15273 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15274 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15275 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15276 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15277 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15278 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15279 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15280 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15281 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15282 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15283 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
15284
15285 case X86::BI__builtin_ia32_vfmaddss3:
15286 case X86::BI__builtin_ia32_vfmaddsd3:
15287 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15288 case X86::BI__builtin_ia32_vfmaddss3_mask:
15289 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15290 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
15291 case X86::BI__builtin_ia32_vfmaddss:
15292 case X86::BI__builtin_ia32_vfmaddsd:
15293 return EmitScalarFMAExpr(*this, E, Ops,
15294 Constant::getNullValue(Ops[0]->getType()));
15295 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15296 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15297 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15298 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
15299 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15300 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15301 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15302 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
15303 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15304 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15305 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15306 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
15307 /*NegAcc*/ true);
15308 case X86::BI__builtin_ia32_vfmaddph:
15309 case X86::BI__builtin_ia32_vfmaddps:
15310 case X86::BI__builtin_ia32_vfmaddpd:
15311 case X86::BI__builtin_ia32_vfmaddph256:
15312 case X86::BI__builtin_ia32_vfmaddps256:
15313 case X86::BI__builtin_ia32_vfmaddpd256:
15314 case X86::BI__builtin_ia32_vfmaddph512_mask:
15315 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15316 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15317 case X86::BI__builtin_ia32_vfmaddnepbh128:
15318 case X86::BI__builtin_ia32_vfmaddnepbh256:
15319 case X86::BI__builtin_ia32_vfmaddnepbh512:
15320 case X86::BI__builtin_ia32_vfmaddps512_mask:
15321 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15322 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15323 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15324 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15325 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15326 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15327 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15328 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15329 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15330 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15331 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15332 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15333 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15334 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15335 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15336 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15337 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15338 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15339 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15340 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15341 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
15342 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15343 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15344 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15345 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15346 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15347 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15348 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15349 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15350 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15351 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15352 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15353 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15354 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15355 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15356 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15357 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15358 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15359 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15360 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15361 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15362 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15363 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15364 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15365 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15366 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
15367
15368 case X86::BI__builtin_ia32_movdqa32store128_mask:
15369 case X86::BI__builtin_ia32_movdqa64store128_mask:
15370 case X86::BI__builtin_ia32_storeaps128_mask:
15371 case X86::BI__builtin_ia32_storeapd128_mask:
15372 case X86::BI__builtin_ia32_movdqa32store256_mask:
15373 case X86::BI__builtin_ia32_movdqa64store256_mask:
15374 case X86::BI__builtin_ia32_storeaps256_mask:
15375 case X86::BI__builtin_ia32_storeapd256_mask:
15376 case X86::BI__builtin_ia32_movdqa32store512_mask:
15377 case X86::BI__builtin_ia32_movdqa64store512_mask:
15378 case X86::BI__builtin_ia32_storeaps512_mask:
15379 case X86::BI__builtin_ia32_storeapd512_mask:
15380 return EmitX86MaskedStore(
15381 *this, Ops,
15382 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15383
15384 case X86::BI__builtin_ia32_loadups128_mask:
15385 case X86::BI__builtin_ia32_loadups256_mask:
15386 case X86::BI__builtin_ia32_loadups512_mask:
15387 case X86::BI__builtin_ia32_loadupd128_mask:
15388 case X86::BI__builtin_ia32_loadupd256_mask:
15389 case X86::BI__builtin_ia32_loadupd512_mask:
15390 case X86::BI__builtin_ia32_loaddquqi128_mask:
15391 case X86::BI__builtin_ia32_loaddquqi256_mask:
15392 case X86::BI__builtin_ia32_loaddquqi512_mask:
15393 case X86::BI__builtin_ia32_loaddquhi128_mask:
15394 case X86::BI__builtin_ia32_loaddquhi256_mask:
15395 case X86::BI__builtin_ia32_loaddquhi512_mask:
15396 case X86::BI__builtin_ia32_loaddqusi128_mask:
15397 case X86::BI__builtin_ia32_loaddqusi256_mask:
15398 case X86::BI__builtin_ia32_loaddqusi512_mask:
15399 case X86::BI__builtin_ia32_loaddqudi128_mask:
15400 case X86::BI__builtin_ia32_loaddqudi256_mask:
15401 case X86::BI__builtin_ia32_loaddqudi512_mask:
15402 return EmitX86MaskedLoad(*this, Ops, Align(1));
15403
15404 case X86::BI__builtin_ia32_loadsbf16128_mask:
15405 case X86::BI__builtin_ia32_loadsh128_mask:
15406 case X86::BI__builtin_ia32_loadss128_mask:
15407 case X86::BI__builtin_ia32_loadsd128_mask:
15408 return EmitX86MaskedLoad(*this, Ops, Align(1));
15409
15410 case X86::BI__builtin_ia32_loadaps128_mask:
15411 case X86::BI__builtin_ia32_loadaps256_mask:
15412 case X86::BI__builtin_ia32_loadaps512_mask:
15413 case X86::BI__builtin_ia32_loadapd128_mask:
15414 case X86::BI__builtin_ia32_loadapd256_mask:
15415 case X86::BI__builtin_ia32_loadapd512_mask:
15416 case X86::BI__builtin_ia32_movdqa32load128_mask:
15417 case X86::BI__builtin_ia32_movdqa32load256_mask:
15418 case X86::BI__builtin_ia32_movdqa32load512_mask:
15419 case X86::BI__builtin_ia32_movdqa64load128_mask:
15420 case X86::BI__builtin_ia32_movdqa64load256_mask:
15421 case X86::BI__builtin_ia32_movdqa64load512_mask:
15422 return EmitX86MaskedLoad(
15423 *this, Ops,
15424 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15425
15426 case X86::BI__builtin_ia32_expandloaddf128_mask:
15427 case X86::BI__builtin_ia32_expandloaddf256_mask:
15428 case X86::BI__builtin_ia32_expandloaddf512_mask:
15429 case X86::BI__builtin_ia32_expandloadsf128_mask:
15430 case X86::BI__builtin_ia32_expandloadsf256_mask:
15431 case X86::BI__builtin_ia32_expandloadsf512_mask:
15432 case X86::BI__builtin_ia32_expandloaddi128_mask:
15433 case X86::BI__builtin_ia32_expandloaddi256_mask:
15434 case X86::BI__builtin_ia32_expandloaddi512_mask:
15435 case X86::BI__builtin_ia32_expandloadsi128_mask:
15436 case X86::BI__builtin_ia32_expandloadsi256_mask:
15437 case X86::BI__builtin_ia32_expandloadsi512_mask:
15438 case X86::BI__builtin_ia32_expandloadhi128_mask:
15439 case X86::BI__builtin_ia32_expandloadhi256_mask:
15440 case X86::BI__builtin_ia32_expandloadhi512_mask:
15441 case X86::BI__builtin_ia32_expandloadqi128_mask:
15442 case X86::BI__builtin_ia32_expandloadqi256_mask:
15443 case X86::BI__builtin_ia32_expandloadqi512_mask:
15444 return EmitX86ExpandLoad(*this, Ops);
15445
15446 case X86::BI__builtin_ia32_compressstoredf128_mask:
15447 case X86::BI__builtin_ia32_compressstoredf256_mask:
15448 case X86::BI__builtin_ia32_compressstoredf512_mask:
15449 case X86::BI__builtin_ia32_compressstoresf128_mask:
15450 case X86::BI__builtin_ia32_compressstoresf256_mask:
15451 case X86::BI__builtin_ia32_compressstoresf512_mask:
15452 case X86::BI__builtin_ia32_compressstoredi128_mask:
15453 case X86::BI__builtin_ia32_compressstoredi256_mask:
15454 case X86::BI__builtin_ia32_compressstoredi512_mask:
15455 case X86::BI__builtin_ia32_compressstoresi128_mask:
15456 case X86::BI__builtin_ia32_compressstoresi256_mask:
15457 case X86::BI__builtin_ia32_compressstoresi512_mask:
15458 case X86::BI__builtin_ia32_compressstorehi128_mask:
15459 case X86::BI__builtin_ia32_compressstorehi256_mask:
15460 case X86::BI__builtin_ia32_compressstorehi512_mask:
15461 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15462 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15463 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15464 return EmitX86CompressStore(*this, Ops);
15465
15466 case X86::BI__builtin_ia32_expanddf128_mask:
15467 case X86::BI__builtin_ia32_expanddf256_mask:
15468 case X86::BI__builtin_ia32_expanddf512_mask:
15469 case X86::BI__builtin_ia32_expandsf128_mask:
15470 case X86::BI__builtin_ia32_expandsf256_mask:
15471 case X86::BI__builtin_ia32_expandsf512_mask:
15472 case X86::BI__builtin_ia32_expanddi128_mask:
15473 case X86::BI__builtin_ia32_expanddi256_mask:
15474 case X86::BI__builtin_ia32_expanddi512_mask:
15475 case X86::BI__builtin_ia32_expandsi128_mask:
15476 case X86::BI__builtin_ia32_expandsi256_mask:
15477 case X86::BI__builtin_ia32_expandsi512_mask:
15478 case X86::BI__builtin_ia32_expandhi128_mask:
15479 case X86::BI__builtin_ia32_expandhi256_mask:
15480 case X86::BI__builtin_ia32_expandhi512_mask:
15481 case X86::BI__builtin_ia32_expandqi128_mask:
15482 case X86::BI__builtin_ia32_expandqi256_mask:
15483 case X86::BI__builtin_ia32_expandqi512_mask:
15484 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15485
15486 case X86::BI__builtin_ia32_compressdf128_mask:
15487 case X86::BI__builtin_ia32_compressdf256_mask:
15488 case X86::BI__builtin_ia32_compressdf512_mask:
15489 case X86::BI__builtin_ia32_compresssf128_mask:
15490 case X86::BI__builtin_ia32_compresssf256_mask:
15491 case X86::BI__builtin_ia32_compresssf512_mask:
15492 case X86::BI__builtin_ia32_compressdi128_mask:
15493 case X86::BI__builtin_ia32_compressdi256_mask:
15494 case X86::BI__builtin_ia32_compressdi512_mask:
15495 case X86::BI__builtin_ia32_compresssi128_mask:
15496 case X86::BI__builtin_ia32_compresssi256_mask:
15497 case X86::BI__builtin_ia32_compresssi512_mask:
15498 case X86::BI__builtin_ia32_compresshi128_mask:
15499 case X86::BI__builtin_ia32_compresshi256_mask:
15500 case X86::BI__builtin_ia32_compresshi512_mask:
15501 case X86::BI__builtin_ia32_compressqi128_mask:
15502 case X86::BI__builtin_ia32_compressqi256_mask:
15503 case X86::BI__builtin_ia32_compressqi512_mask:
15504 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15505
15506 case X86::BI__builtin_ia32_gather3div2df:
15507 case X86::BI__builtin_ia32_gather3div2di:
15508 case X86::BI__builtin_ia32_gather3div4df:
15509 case X86::BI__builtin_ia32_gather3div4di:
15510 case X86::BI__builtin_ia32_gather3div4sf:
15511 case X86::BI__builtin_ia32_gather3div4si:
15512 case X86::BI__builtin_ia32_gather3div8sf:
15513 case X86::BI__builtin_ia32_gather3div8si:
15514 case X86::BI__builtin_ia32_gather3siv2df:
15515 case X86::BI__builtin_ia32_gather3siv2di:
15516 case X86::BI__builtin_ia32_gather3siv4df:
15517 case X86::BI__builtin_ia32_gather3siv4di:
15518 case X86::BI__builtin_ia32_gather3siv4sf:
15519 case X86::BI__builtin_ia32_gather3siv4si:
15520 case X86::BI__builtin_ia32_gather3siv8sf:
15521 case X86::BI__builtin_ia32_gather3siv8si:
15522 case X86::BI__builtin_ia32_gathersiv8df:
15523 case X86::BI__builtin_ia32_gathersiv16sf:
15524 case X86::BI__builtin_ia32_gatherdiv8df:
15525 case X86::BI__builtin_ia32_gatherdiv16sf:
15526 case X86::BI__builtin_ia32_gathersiv8di:
15527 case X86::BI__builtin_ia32_gathersiv16si:
15528 case X86::BI__builtin_ia32_gatherdiv8di:
15529 case X86::BI__builtin_ia32_gatherdiv16si: {
15530 Intrinsic::ID IID;
15531 switch (BuiltinID) {
15532 default: llvm_unreachable("Unexpected builtin");
15533 case X86::BI__builtin_ia32_gather3div2df:
15534 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15535 break;
15536 case X86::BI__builtin_ia32_gather3div2di:
15537 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15538 break;
15539 case X86::BI__builtin_ia32_gather3div4df:
15540 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15541 break;
15542 case X86::BI__builtin_ia32_gather3div4di:
15543 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15544 break;
15545 case X86::BI__builtin_ia32_gather3div4sf:
15546 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15547 break;
15548 case X86::BI__builtin_ia32_gather3div4si:
15549 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15550 break;
15551 case X86::BI__builtin_ia32_gather3div8sf:
15552 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15553 break;
15554 case X86::BI__builtin_ia32_gather3div8si:
15555 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15556 break;
15557 case X86::BI__builtin_ia32_gather3siv2df:
15558 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15559 break;
15560 case X86::BI__builtin_ia32_gather3siv2di:
15561 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15562 break;
15563 case X86::BI__builtin_ia32_gather3siv4df:
15564 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15565 break;
15566 case X86::BI__builtin_ia32_gather3siv4di:
15567 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15568 break;
15569 case X86::BI__builtin_ia32_gather3siv4sf:
15570 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15571 break;
15572 case X86::BI__builtin_ia32_gather3siv4si:
15573 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15574 break;
15575 case X86::BI__builtin_ia32_gather3siv8sf:
15576 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15577 break;
15578 case X86::BI__builtin_ia32_gather3siv8si:
15579 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15580 break;
15581 case X86::BI__builtin_ia32_gathersiv8df:
15582 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15583 break;
15584 case X86::BI__builtin_ia32_gathersiv16sf:
15585 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15586 break;
15587 case X86::BI__builtin_ia32_gatherdiv8df:
15588 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15589 break;
15590 case X86::BI__builtin_ia32_gatherdiv16sf:
15591 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15592 break;
15593 case X86::BI__builtin_ia32_gathersiv8di:
15594 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15595 break;
15596 case X86::BI__builtin_ia32_gathersiv16si:
15597 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15598 break;
15599 case X86::BI__builtin_ia32_gatherdiv8di:
15600 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15601 break;
15602 case X86::BI__builtin_ia32_gatherdiv16si:
15603 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15604 break;
15605 }
15606
15607 unsigned MinElts = std::min(
15608 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15609 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15610 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15611 Function *Intr = CGM.getIntrinsic(IID);
15612 return Builder.CreateCall(Intr, Ops);
15613 }
15614
15615 case X86::BI__builtin_ia32_scattersiv8df:
15616 case X86::BI__builtin_ia32_scattersiv16sf:
15617 case X86::BI__builtin_ia32_scatterdiv8df:
15618 case X86::BI__builtin_ia32_scatterdiv16sf:
15619 case X86::BI__builtin_ia32_scattersiv8di:
15620 case X86::BI__builtin_ia32_scattersiv16si:
15621 case X86::BI__builtin_ia32_scatterdiv8di:
15622 case X86::BI__builtin_ia32_scatterdiv16si:
15623 case X86::BI__builtin_ia32_scatterdiv2df:
15624 case X86::BI__builtin_ia32_scatterdiv2di:
15625 case X86::BI__builtin_ia32_scatterdiv4df:
15626 case X86::BI__builtin_ia32_scatterdiv4di:
15627 case X86::BI__builtin_ia32_scatterdiv4sf:
15628 case X86::BI__builtin_ia32_scatterdiv4si:
15629 case X86::BI__builtin_ia32_scatterdiv8sf:
15630 case X86::BI__builtin_ia32_scatterdiv8si:
15631 case X86::BI__builtin_ia32_scattersiv2df:
15632 case X86::BI__builtin_ia32_scattersiv2di:
15633 case X86::BI__builtin_ia32_scattersiv4df:
15634 case X86::BI__builtin_ia32_scattersiv4di:
15635 case X86::BI__builtin_ia32_scattersiv4sf:
15636 case X86::BI__builtin_ia32_scattersiv4si:
15637 case X86::BI__builtin_ia32_scattersiv8sf:
15638 case X86::BI__builtin_ia32_scattersiv8si: {
15639 Intrinsic::ID IID;
15640 switch (BuiltinID) {
15641 default: llvm_unreachable("Unexpected builtin");
15642 case X86::BI__builtin_ia32_scattersiv8df:
15643 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15644 break;
15645 case X86::BI__builtin_ia32_scattersiv16sf:
15646 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15647 break;
15648 case X86::BI__builtin_ia32_scatterdiv8df:
15649 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15650 break;
15651 case X86::BI__builtin_ia32_scatterdiv16sf:
15652 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15653 break;
15654 case X86::BI__builtin_ia32_scattersiv8di:
15655 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15656 break;
15657 case X86::BI__builtin_ia32_scattersiv16si:
15658 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15659 break;
15660 case X86::BI__builtin_ia32_scatterdiv8di:
15661 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15662 break;
15663 case X86::BI__builtin_ia32_scatterdiv16si:
15664 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15665 break;
15666 case X86::BI__builtin_ia32_scatterdiv2df:
15667 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15668 break;
15669 case X86::BI__builtin_ia32_scatterdiv2di:
15670 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15671 break;
15672 case X86::BI__builtin_ia32_scatterdiv4df:
15673 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15674 break;
15675 case X86::BI__builtin_ia32_scatterdiv4di:
15676 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15677 break;
15678 case X86::BI__builtin_ia32_scatterdiv4sf:
15679 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15680 break;
15681 case X86::BI__builtin_ia32_scatterdiv4si:
15682 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15683 break;
15684 case X86::BI__builtin_ia32_scatterdiv8sf:
15685 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15686 break;
15687 case X86::BI__builtin_ia32_scatterdiv8si:
15688 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15689 break;
15690 case X86::BI__builtin_ia32_scattersiv2df:
15691 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15692 break;
15693 case X86::BI__builtin_ia32_scattersiv2di:
15694 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15695 break;
15696 case X86::BI__builtin_ia32_scattersiv4df:
15697 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15698 break;
15699 case X86::BI__builtin_ia32_scattersiv4di:
15700 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15701 break;
15702 case X86::BI__builtin_ia32_scattersiv4sf:
15703 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15704 break;
15705 case X86::BI__builtin_ia32_scattersiv4si:
15706 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15707 break;
15708 case X86::BI__builtin_ia32_scattersiv8sf:
15709 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15710 break;
15711 case X86::BI__builtin_ia32_scattersiv8si:
15712 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15713 break;
15714 }
15715
15716 unsigned MinElts = std::min(
15717 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15718 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15719 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15720 Function *Intr = CGM.getIntrinsic(IID);
15721 return Builder.CreateCall(Intr, Ops);
15722 }
15723
15724 case X86::BI__builtin_ia32_vextractf128_pd256:
15725 case X86::BI__builtin_ia32_vextractf128_ps256:
15726 case X86::BI__builtin_ia32_vextractf128_si256:
15727 case X86::BI__builtin_ia32_extract128i256:
15728 case X86::BI__builtin_ia32_extractf64x4_mask:
15729 case X86::BI__builtin_ia32_extractf32x4_mask:
15730 case X86::BI__builtin_ia32_extracti64x4_mask:
15731 case X86::BI__builtin_ia32_extracti32x4_mask:
15732 case X86::BI__builtin_ia32_extractf32x8_mask:
15733 case X86::BI__builtin_ia32_extracti32x8_mask:
15734 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15735 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15736 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15737 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15738 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15739 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15740 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15741 unsigned NumElts = DstTy->getNumElements();
15742 unsigned SrcNumElts =
15743 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15744 unsigned SubVectors = SrcNumElts / NumElts;
15745 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15746 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15747 Index &= SubVectors - 1; // Remove any extra bits.
15748 Index *= NumElts;
15749
15750 int Indices[16];
15751 for (unsigned i = 0; i != NumElts; ++i)
15752 Indices[i] = i + Index;
15753
15754 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15755 "extract");
15756
15757 if (Ops.size() == 4)
15758 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15759
15760 return Res;
15761 }
15762 case X86::BI__builtin_ia32_vinsertf128_pd256:
15763 case X86::BI__builtin_ia32_vinsertf128_ps256:
15764 case X86::BI__builtin_ia32_vinsertf128_si256:
15765 case X86::BI__builtin_ia32_insert128i256:
15766 case X86::BI__builtin_ia32_insertf64x4:
15767 case X86::BI__builtin_ia32_insertf32x4:
15768 case X86::BI__builtin_ia32_inserti64x4:
15769 case X86::BI__builtin_ia32_inserti32x4:
15770 case X86::BI__builtin_ia32_insertf32x8:
15771 case X86::BI__builtin_ia32_inserti32x8:
15772 case X86::BI__builtin_ia32_insertf32x4_256:
15773 case X86::BI__builtin_ia32_inserti32x4_256:
15774 case X86::BI__builtin_ia32_insertf64x2_256:
15775 case X86::BI__builtin_ia32_inserti64x2_256:
15776 case X86::BI__builtin_ia32_insertf64x2_512:
15777 case X86::BI__builtin_ia32_inserti64x2_512: {
15778 unsigned DstNumElts =
15779 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15780 unsigned SrcNumElts =
15781 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15782 unsigned SubVectors = DstNumElts / SrcNumElts;
15783 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15784 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15785 Index &= SubVectors - 1; // Remove any extra bits.
15786 Index *= SrcNumElts;
15787
15788 int Indices[16];
15789 for (unsigned i = 0; i != DstNumElts; ++i)
15790 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15791
15792 Value *Op1 = Builder.CreateShuffleVector(
15793 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15794
15795 for (unsigned i = 0; i != DstNumElts; ++i) {
15796 if (i >= Index && i < (Index + SrcNumElts))
15797 Indices[i] = (i - Index) + DstNumElts;
15798 else
15799 Indices[i] = i;
15800 }
15801
15802 return Builder.CreateShuffleVector(Ops[0], Op1,
15803 ArrayRef(Indices, DstNumElts), "insert");
15804 }
15805 case X86::BI__builtin_ia32_pmovqd512_mask:
15806 case X86::BI__builtin_ia32_pmovwb512_mask: {
15807 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15808 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15809 }
15810 case X86::BI__builtin_ia32_pmovdb512_mask:
15811 case X86::BI__builtin_ia32_pmovdw512_mask:
15812 case X86::BI__builtin_ia32_pmovqw512_mask: {
15813 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15814 if (C->isAllOnesValue())
15815 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15816
15817 Intrinsic::ID IID;
15818 switch (BuiltinID) {
15819 default: llvm_unreachable("Unsupported intrinsic!");
15820 case X86::BI__builtin_ia32_pmovdb512_mask:
15821 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15822 break;
15823 case X86::BI__builtin_ia32_pmovdw512_mask:
15824 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15825 break;
15826 case X86::BI__builtin_ia32_pmovqw512_mask:
15827 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15828 break;
15829 }
15830
15831 Function *Intr = CGM.getIntrinsic(IID);
15832 return Builder.CreateCall(Intr, Ops);
15833 }
15834 case X86::BI__builtin_ia32_pblendw128:
15835 case X86::BI__builtin_ia32_blendpd:
15836 case X86::BI__builtin_ia32_blendps:
15837 case X86::BI__builtin_ia32_blendpd256:
15838 case X86::BI__builtin_ia32_blendps256:
15839 case X86::BI__builtin_ia32_pblendw256:
15840 case X86::BI__builtin_ia32_pblendd128:
15841 case X86::BI__builtin_ia32_pblendd256: {
15842 unsigned NumElts =
15843 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15844 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15845
15846 int Indices[16];
15847 // If there are more than 8 elements, the immediate is used twice so make
15848 // sure we handle that.
15849 for (unsigned i = 0; i != NumElts; ++i)
15850 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15851
15852 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15853 ArrayRef(Indices, NumElts), "blend");
15854 }
15855 case X86::BI__builtin_ia32_pshuflw:
15856 case X86::BI__builtin_ia32_pshuflw256:
15857 case X86::BI__builtin_ia32_pshuflw512: {
15858 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15859 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15860 unsigned NumElts = Ty->getNumElements();
15861
15862 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15863 Imm = (Imm & 0xff) * 0x01010101;
15864
15865 int Indices[32];
15866 for (unsigned l = 0; l != NumElts; l += 8) {
15867 for (unsigned i = 0; i != 4; ++i) {
15868 Indices[l + i] = l + (Imm & 3);
15869 Imm >>= 2;
15870 }
15871 for (unsigned i = 4; i != 8; ++i)
15872 Indices[l + i] = l + i;
15873 }
15874
15875 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15876 "pshuflw");
15877 }
15878 case X86::BI__builtin_ia32_pshufhw:
15879 case X86::BI__builtin_ia32_pshufhw256:
15880 case X86::BI__builtin_ia32_pshufhw512: {
15881 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15882 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15883 unsigned NumElts = Ty->getNumElements();
15884
15885 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15886 Imm = (Imm & 0xff) * 0x01010101;
15887
15888 int Indices[32];
15889 for (unsigned l = 0; l != NumElts; l += 8) {
15890 for (unsigned i = 0; i != 4; ++i)
15891 Indices[l + i] = l + i;
15892 for (unsigned i = 4; i != 8; ++i) {
15893 Indices[l + i] = l + 4 + (Imm & 3);
15894 Imm >>= 2;
15895 }
15896 }
15897
15898 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15899 "pshufhw");
15900 }
15901 case X86::BI__builtin_ia32_pshufd:
15902 case X86::BI__builtin_ia32_pshufd256:
15903 case X86::BI__builtin_ia32_pshufd512:
15904 case X86::BI__builtin_ia32_vpermilpd:
15905 case X86::BI__builtin_ia32_vpermilps:
15906 case X86::BI__builtin_ia32_vpermilpd256:
15907 case X86::BI__builtin_ia32_vpermilps256:
15908 case X86::BI__builtin_ia32_vpermilpd512:
15909 case X86::BI__builtin_ia32_vpermilps512: {
15910 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15911 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15912 unsigned NumElts = Ty->getNumElements();
15913 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15914 unsigned NumLaneElts = NumElts / NumLanes;
15915
15916 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15917 Imm = (Imm & 0xff) * 0x01010101;
15918
15919 int Indices[16];
15920 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15921 for (unsigned i = 0; i != NumLaneElts; ++i) {
15922 Indices[i + l] = (Imm % NumLaneElts) + l;
15923 Imm /= NumLaneElts;
15924 }
15925 }
15926
15927 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15928 "permil");
15929 }
15930 case X86::BI__builtin_ia32_shufpd:
15931 case X86::BI__builtin_ia32_shufpd256:
15932 case X86::BI__builtin_ia32_shufpd512:
15933 case X86::BI__builtin_ia32_shufps:
15934 case X86::BI__builtin_ia32_shufps256:
15935 case X86::BI__builtin_ia32_shufps512: {
15936 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15937 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15938 unsigned NumElts = Ty->getNumElements();
15939 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15940 unsigned NumLaneElts = NumElts / NumLanes;
15941
15942 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15943 Imm = (Imm & 0xff) * 0x01010101;
15944
15945 int Indices[16];
15946 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15947 for (unsigned i = 0; i != NumLaneElts; ++i) {
15948 unsigned Index = Imm % NumLaneElts;
15949 Imm /= NumLaneElts;
15950 if (i >= (NumLaneElts / 2))
15951 Index += NumElts;
15952 Indices[l + i] = l + Index;
15953 }
15954 }
15955
15956 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15957 ArrayRef(Indices, NumElts), "shufp");
15958 }
15959 case X86::BI__builtin_ia32_permdi256:
15960 case X86::BI__builtin_ia32_permdf256:
15961 case X86::BI__builtin_ia32_permdi512:
15962 case X86::BI__builtin_ia32_permdf512: {
15963 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15964 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15965 unsigned NumElts = Ty->getNumElements();
15966
15967 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15968 int Indices[8];
15969 for (unsigned l = 0; l != NumElts; l += 4)
15970 for (unsigned i = 0; i != 4; ++i)
15971 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15972
15973 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15974 "perm");
15975 }
15976 case X86::BI__builtin_ia32_palignr128:
15977 case X86::BI__builtin_ia32_palignr256:
15978 case X86::BI__builtin_ia32_palignr512: {
15979 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15980
15981 unsigned NumElts =
15982 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15983 assert(NumElts % 16 == 0);
15984
15985 // If palignr is shifting the pair of vectors more than the size of two
15986 // lanes, emit zero.
15987 if (ShiftVal >= 32)
15988 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15989
15990 // If palignr is shifting the pair of input vectors more than one lane,
15991 // but less than two lanes, convert to shifting in zeroes.
15992 if (ShiftVal > 16) {
15993 ShiftVal -= 16;
15994 Ops[1] = Ops[0];
15995 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15996 }
15997
15998 int Indices[64];
15999 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16000 for (unsigned l = 0; l != NumElts; l += 16) {
16001 for (unsigned i = 0; i != 16; ++i) {
16002 unsigned Idx = ShiftVal + i;
16003 if (Idx >= 16)
16004 Idx += NumElts - 16; // End of lane, switch operand.
16005 Indices[l + i] = Idx + l;
16006 }
16007 }
16008
16009 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16010 ArrayRef(Indices, NumElts), "palignr");
16011 }
16012 case X86::BI__builtin_ia32_alignd128:
16013 case X86::BI__builtin_ia32_alignd256:
16014 case X86::BI__builtin_ia32_alignd512:
16015 case X86::BI__builtin_ia32_alignq128:
16016 case X86::BI__builtin_ia32_alignq256:
16017 case X86::BI__builtin_ia32_alignq512: {
16018 unsigned NumElts =
16019 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16020 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16021
16022 // Mask the shift amount to width of a vector.
16023 ShiftVal &= NumElts - 1;
16024
16025 int Indices[16];
16026 for (unsigned i = 0; i != NumElts; ++i)
16027 Indices[i] = i + ShiftVal;
16028
16029 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16030 ArrayRef(Indices, NumElts), "valign");
16031 }
16032 case X86::BI__builtin_ia32_shuf_f32x4_256:
16033 case X86::BI__builtin_ia32_shuf_f64x2_256:
16034 case X86::BI__builtin_ia32_shuf_i32x4_256:
16035 case X86::BI__builtin_ia32_shuf_i64x2_256:
16036 case X86::BI__builtin_ia32_shuf_f32x4:
16037 case X86::BI__builtin_ia32_shuf_f64x2:
16038 case X86::BI__builtin_ia32_shuf_i32x4:
16039 case X86::BI__builtin_ia32_shuf_i64x2: {
16040 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16041 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16042 unsigned NumElts = Ty->getNumElements();
16043 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16044 unsigned NumLaneElts = NumElts / NumLanes;
16045
16046 int Indices[16];
16047 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16048 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16049 Imm /= NumLanes; // Discard the bits we just used.
16050 if (l >= (NumElts / 2))
16051 Index += NumElts; // Switch to other source.
16052 for (unsigned i = 0; i != NumLaneElts; ++i) {
16053 Indices[l + i] = Index + i;
16054 }
16055 }
16056
16057 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16058 ArrayRef(Indices, NumElts), "shuf");
16059 }
16060
16061 case X86::BI__builtin_ia32_vperm2f128_pd256:
16062 case X86::BI__builtin_ia32_vperm2f128_ps256:
16063 case X86::BI__builtin_ia32_vperm2f128_si256:
16064 case X86::BI__builtin_ia32_permti256: {
16065 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16066 unsigned NumElts =
16067 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16068
16069 // This takes a very simple approach since there are two lanes and a
16070 // shuffle can have 2 inputs. So we reserve the first input for the first
16071 // lane and the second input for the second lane. This may result in
16072 // duplicate sources, but this can be dealt with in the backend.
16073
16074 Value *OutOps[2];
16075 int Indices[8];
16076 for (unsigned l = 0; l != 2; ++l) {
16077 // Determine the source for this lane.
16078 if (Imm & (1 << ((l * 4) + 3)))
16079 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16080 else if (Imm & (1 << ((l * 4) + 1)))
16081 OutOps[l] = Ops[1];
16082 else
16083 OutOps[l] = Ops[0];
16084
16085 for (unsigned i = 0; i != NumElts/2; ++i) {
16086 // Start with ith element of the source for this lane.
16087 unsigned Idx = (l * NumElts) + i;
16088 // If bit 0 of the immediate half is set, switch to the high half of
16089 // the source.
16090 if (Imm & (1 << (l * 4)))
16091 Idx += NumElts/2;
16092 Indices[(l * (NumElts/2)) + i] = Idx;
16093 }
16094 }
16095
16096 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16097 ArrayRef(Indices, NumElts), "vperm");
16098 }
16099
16100 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16101 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16102 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16103 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16104 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16105 // Builtin type is vXi64 so multiply by 8 to get bytes.
16106 unsigned NumElts = ResultType->getNumElements() * 8;
16107
16108 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16109 if (ShiftVal >= 16)
16110 return llvm::Constant::getNullValue(ResultType);
16111
16112 int Indices[64];
16113 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16114 for (unsigned l = 0; l != NumElts; l += 16) {
16115 for (unsigned i = 0; i != 16; ++i) {
16116 unsigned Idx = NumElts + i - ShiftVal;
16117 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
16118 Indices[l + i] = Idx + l;
16119 }
16120 }
16121
16122 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16123 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16124 Value *Zero = llvm::Constant::getNullValue(VecTy);
16125 Value *SV = Builder.CreateShuffleVector(
16126 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
16127 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
16128 }
16129 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16130 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16131 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16132 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16133 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16134 // Builtin type is vXi64 so multiply by 8 to get bytes.
16135 unsigned NumElts = ResultType->getNumElements() * 8;
16136
16137 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16138 if (ShiftVal >= 16)
16139 return llvm::Constant::getNullValue(ResultType);
16140
16141 int Indices[64];
16142 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16143 for (unsigned l = 0; l != NumElts; l += 16) {
16144 for (unsigned i = 0; i != 16; ++i) {
16145 unsigned Idx = i + ShiftVal;
16146 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
16147 Indices[l + i] = Idx + l;
16148 }
16149 }
16150
16151 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16152 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16153 Value *Zero = llvm::Constant::getNullValue(VecTy);
16154 Value *SV = Builder.CreateShuffleVector(
16155 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
16156 return Builder.CreateBitCast(SV, ResultType, "cast");
16157 }
16158 case X86::BI__builtin_ia32_kshiftliqi:
16159 case X86::BI__builtin_ia32_kshiftlihi:
16160 case X86::BI__builtin_ia32_kshiftlisi:
16161 case X86::BI__builtin_ia32_kshiftlidi: {
16162 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16163 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16164
16165 if (ShiftVal >= NumElts)
16166 return llvm::Constant::getNullValue(Ops[0]->getType());
16167
16168 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16169
16170 int Indices[64];
16171 for (unsigned i = 0; i != NumElts; ++i)
16172 Indices[i] = NumElts + i - ShiftVal;
16173
16174 Value *Zero = llvm::Constant::getNullValue(In->getType());
16175 Value *SV = Builder.CreateShuffleVector(
16176 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
16177 return Builder.CreateBitCast(SV, Ops[0]->getType());
16178 }
16179 case X86::BI__builtin_ia32_kshiftriqi:
16180 case X86::BI__builtin_ia32_kshiftrihi:
16181 case X86::BI__builtin_ia32_kshiftrisi:
16182 case X86::BI__builtin_ia32_kshiftridi: {
16183 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16184 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16185
16186 if (ShiftVal >= NumElts)
16187 return llvm::Constant::getNullValue(Ops[0]->getType());
16188
16189 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16190
16191 int Indices[64];
16192 for (unsigned i = 0; i != NumElts; ++i)
16193 Indices[i] = i + ShiftVal;
16194
16195 Value *Zero = llvm::Constant::getNullValue(In->getType());
16196 Value *SV = Builder.CreateShuffleVector(
16197 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
16198 return Builder.CreateBitCast(SV, Ops[0]->getType());
16199 }
16200 case X86::BI__builtin_ia32_movnti:
16201 case X86::BI__builtin_ia32_movnti64:
16202 case X86::BI__builtin_ia32_movntsd:
16203 case X86::BI__builtin_ia32_movntss: {
16204 llvm::MDNode *Node = llvm::MDNode::get(
16205 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
16206
16207 Value *Ptr = Ops[0];
16208 Value *Src = Ops[1];
16209
16210 // Extract the 0'th element of the source vector.
16211 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16212 BuiltinID == X86::BI__builtin_ia32_movntss)
16213 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
16214
16215 // Unaligned nontemporal store of the scalar value.
16216 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
16217 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
16218 SI->setAlignment(llvm::Align(1));
16219 return SI;
16220 }
16221 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16222 case X86::BI__builtin_ia32_vprotb:
16223 case X86::BI__builtin_ia32_vprotw:
16224 case X86::BI__builtin_ia32_vprotd:
16225 case X86::BI__builtin_ia32_vprotq:
16226 case X86::BI__builtin_ia32_vprotbi:
16227 case X86::BI__builtin_ia32_vprotwi:
16228 case X86::BI__builtin_ia32_vprotdi:
16229 case X86::BI__builtin_ia32_vprotqi:
16230 case X86::BI__builtin_ia32_prold128:
16231 case X86::BI__builtin_ia32_prold256:
16232 case X86::BI__builtin_ia32_prold512:
16233 case X86::BI__builtin_ia32_prolq128:
16234 case X86::BI__builtin_ia32_prolq256:
16235 case X86::BI__builtin_ia32_prolq512:
16236 case X86::BI__builtin_ia32_prolvd128:
16237 case X86::BI__builtin_ia32_prolvd256:
16238 case X86::BI__builtin_ia32_prolvd512:
16239 case X86::BI__builtin_ia32_prolvq128:
16240 case X86::BI__builtin_ia32_prolvq256:
16241 case X86::BI__builtin_ia32_prolvq512:
16242 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
16243 case X86::BI__builtin_ia32_prord128:
16244 case X86::BI__builtin_ia32_prord256:
16245 case X86::BI__builtin_ia32_prord512:
16246 case X86::BI__builtin_ia32_prorq128:
16247 case X86::BI__builtin_ia32_prorq256:
16248 case X86::BI__builtin_ia32_prorq512:
16249 case X86::BI__builtin_ia32_prorvd128:
16250 case X86::BI__builtin_ia32_prorvd256:
16251 case X86::BI__builtin_ia32_prorvd512:
16252 case X86::BI__builtin_ia32_prorvq128:
16253 case X86::BI__builtin_ia32_prorvq256:
16254 case X86::BI__builtin_ia32_prorvq512:
16255 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
16256 case X86::BI__builtin_ia32_selectb_128:
16257 case X86::BI__builtin_ia32_selectb_256:
16258 case X86::BI__builtin_ia32_selectb_512:
16259 case X86::BI__builtin_ia32_selectw_128:
16260 case X86::BI__builtin_ia32_selectw_256:
16261 case X86::BI__builtin_ia32_selectw_512:
16262 case X86::BI__builtin_ia32_selectd_128:
16263 case X86::BI__builtin_ia32_selectd_256:
16264 case X86::BI__builtin_ia32_selectd_512:
16265 case X86::BI__builtin_ia32_selectq_128:
16266 case X86::BI__builtin_ia32_selectq_256:
16267 case X86::BI__builtin_ia32_selectq_512:
16268 case X86::BI__builtin_ia32_selectph_128:
16269 case X86::BI__builtin_ia32_selectph_256:
16270 case X86::BI__builtin_ia32_selectph_512:
16271 case X86::BI__builtin_ia32_selectpbf_128:
16272 case X86::BI__builtin_ia32_selectpbf_256:
16273 case X86::BI__builtin_ia32_selectpbf_512:
16274 case X86::BI__builtin_ia32_selectps_128:
16275 case X86::BI__builtin_ia32_selectps_256:
16276 case X86::BI__builtin_ia32_selectps_512:
16277 case X86::BI__builtin_ia32_selectpd_128:
16278 case X86::BI__builtin_ia32_selectpd_256:
16279 case X86::BI__builtin_ia32_selectpd_512:
16280 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
16281 case X86::BI__builtin_ia32_selectsh_128:
16282 case X86::BI__builtin_ia32_selectsbf_128:
16283 case X86::BI__builtin_ia32_selectss_128:
16284 case X86::BI__builtin_ia32_selectsd_128: {
16285 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16286 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16287 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
16288 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16289 }
16290 case X86::BI__builtin_ia32_cmpb128_mask:
16291 case X86::BI__builtin_ia32_cmpb256_mask:
16292 case X86::BI__builtin_ia32_cmpb512_mask:
16293 case X86::BI__builtin_ia32_cmpw128_mask:
16294 case X86::BI__builtin_ia32_cmpw256_mask:
16295 case X86::BI__builtin_ia32_cmpw512_mask:
16296 case X86::BI__builtin_ia32_cmpd128_mask:
16297 case X86::BI__builtin_ia32_cmpd256_mask:
16298 case X86::BI__builtin_ia32_cmpd512_mask:
16299 case X86::BI__builtin_ia32_cmpq128_mask:
16300 case X86::BI__builtin_ia32_cmpq256_mask:
16301 case X86::BI__builtin_ia32_cmpq512_mask: {
16302 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16303 return EmitX86MaskedCompare(*this, CC, true, Ops);
16304 }
16305 case X86::BI__builtin_ia32_ucmpb128_mask:
16306 case X86::BI__builtin_ia32_ucmpb256_mask:
16307 case X86::BI__builtin_ia32_ucmpb512_mask:
16308 case X86::BI__builtin_ia32_ucmpw128_mask:
16309 case X86::BI__builtin_ia32_ucmpw256_mask:
16310 case X86::BI__builtin_ia32_ucmpw512_mask:
16311 case X86::BI__builtin_ia32_ucmpd128_mask:
16312 case X86::BI__builtin_ia32_ucmpd256_mask:
16313 case X86::BI__builtin_ia32_ucmpd512_mask:
16314 case X86::BI__builtin_ia32_ucmpq128_mask:
16315 case X86::BI__builtin_ia32_ucmpq256_mask:
16316 case X86::BI__builtin_ia32_ucmpq512_mask: {
16317 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16318 return EmitX86MaskedCompare(*this, CC, false, Ops);
16319 }
16320 case X86::BI__builtin_ia32_vpcomb:
16321 case X86::BI__builtin_ia32_vpcomw:
16322 case X86::BI__builtin_ia32_vpcomd:
16323 case X86::BI__builtin_ia32_vpcomq:
16324 return EmitX86vpcom(*this, Ops, true);
16325 case X86::BI__builtin_ia32_vpcomub:
16326 case X86::BI__builtin_ia32_vpcomuw:
16327 case X86::BI__builtin_ia32_vpcomud:
16328 case X86::BI__builtin_ia32_vpcomuq:
16329 return EmitX86vpcom(*this, Ops, false);
16330
16331 case X86::BI__builtin_ia32_kortestcqi:
16332 case X86::BI__builtin_ia32_kortestchi:
16333 case X86::BI__builtin_ia32_kortestcsi:
16334 case X86::BI__builtin_ia32_kortestcdi: {
16335 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16336 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16337 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16338 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16339 }
16340 case X86::BI__builtin_ia32_kortestzqi:
16341 case X86::BI__builtin_ia32_kortestzhi:
16342 case X86::BI__builtin_ia32_kortestzsi:
16343 case X86::BI__builtin_ia32_kortestzdi: {
16344 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16345 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
16346 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16347 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16348 }
16349
16350 case X86::BI__builtin_ia32_ktestcqi:
16351 case X86::BI__builtin_ia32_ktestzqi:
16352 case X86::BI__builtin_ia32_ktestchi:
16353 case X86::BI__builtin_ia32_ktestzhi:
16354 case X86::BI__builtin_ia32_ktestcsi:
16355 case X86::BI__builtin_ia32_ktestzsi:
16356 case X86::BI__builtin_ia32_ktestcdi:
16357 case X86::BI__builtin_ia32_ktestzdi: {
16358 Intrinsic::ID IID;
16359 switch (BuiltinID) {
16360 default: llvm_unreachable("Unsupported intrinsic!");
16361 case X86::BI__builtin_ia32_ktestcqi:
16362 IID = Intrinsic::x86_avx512_ktestc_b;
16363 break;
16364 case X86::BI__builtin_ia32_ktestzqi:
16365 IID = Intrinsic::x86_avx512_ktestz_b;
16366 break;
16367 case X86::BI__builtin_ia32_ktestchi:
16368 IID = Intrinsic::x86_avx512_ktestc_w;
16369 break;
16370 case X86::BI__builtin_ia32_ktestzhi:
16371 IID = Intrinsic::x86_avx512_ktestz_w;
16372 break;
16373 case X86::BI__builtin_ia32_ktestcsi:
16374 IID = Intrinsic::x86_avx512_ktestc_d;
16375 break;
16376 case X86::BI__builtin_ia32_ktestzsi:
16377 IID = Intrinsic::x86_avx512_ktestz_d;
16378 break;
16379 case X86::BI__builtin_ia32_ktestcdi:
16380 IID = Intrinsic::x86_avx512_ktestc_q;
16381 break;
16382 case X86::BI__builtin_ia32_ktestzdi:
16383 IID = Intrinsic::x86_avx512_ktestz_q;
16384 break;
16385 }
16386
16387 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16388 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16389 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16390 Function *Intr = CGM.getIntrinsic(IID);
16391 return Builder.CreateCall(Intr, {LHS, RHS});
16392 }
16393
16394 case X86::BI__builtin_ia32_kaddqi:
16395 case X86::BI__builtin_ia32_kaddhi:
16396 case X86::BI__builtin_ia32_kaddsi:
16397 case X86::BI__builtin_ia32_kadddi: {
16398 Intrinsic::ID IID;
16399 switch (BuiltinID) {
16400 default: llvm_unreachable("Unsupported intrinsic!");
16401 case X86::BI__builtin_ia32_kaddqi:
16402 IID = Intrinsic::x86_avx512_kadd_b;
16403 break;
16404 case X86::BI__builtin_ia32_kaddhi:
16405 IID = Intrinsic::x86_avx512_kadd_w;
16406 break;
16407 case X86::BI__builtin_ia32_kaddsi:
16408 IID = Intrinsic::x86_avx512_kadd_d;
16409 break;
16410 case X86::BI__builtin_ia32_kadddi:
16411 IID = Intrinsic::x86_avx512_kadd_q;
16412 break;
16413 }
16414
16415 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16416 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16417 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16418 Function *Intr = CGM.getIntrinsic(IID);
16419 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
16420 return Builder.CreateBitCast(Res, Ops[0]->getType());
16421 }
16422 case X86::BI__builtin_ia32_kandqi:
16423 case X86::BI__builtin_ia32_kandhi:
16424 case X86::BI__builtin_ia32_kandsi:
16425 case X86::BI__builtin_ia32_kanddi:
16426 return EmitX86MaskLogic(*this, Instruction::And, Ops);
16427 case X86::BI__builtin_ia32_kandnqi:
16428 case X86::BI__builtin_ia32_kandnhi:
16429 case X86::BI__builtin_ia32_kandnsi:
16430 case X86::BI__builtin_ia32_kandndi:
16431 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
16432 case X86::BI__builtin_ia32_korqi:
16433 case X86::BI__builtin_ia32_korhi:
16434 case X86::BI__builtin_ia32_korsi:
16435 case X86::BI__builtin_ia32_kordi:
16436 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
16437 case X86::BI__builtin_ia32_kxnorqi:
16438 case X86::BI__builtin_ia32_kxnorhi:
16439 case X86::BI__builtin_ia32_kxnorsi:
16440 case X86::BI__builtin_ia32_kxnordi:
16441 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
16442 case X86::BI__builtin_ia32_kxorqi:
16443 case X86::BI__builtin_ia32_kxorhi:
16444 case X86::BI__builtin_ia32_kxorsi:
16445 case X86::BI__builtin_ia32_kxordi:
16446 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
16447 case X86::BI__builtin_ia32_knotqi:
16448 case X86::BI__builtin_ia32_knothi:
16449 case X86::BI__builtin_ia32_knotsi:
16450 case X86::BI__builtin_ia32_knotdi: {
16451 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16452 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16453 return Builder.CreateBitCast(Builder.CreateNot(Res),
16454 Ops[0]->getType());
16455 }
16456 case X86::BI__builtin_ia32_kmovb:
16457 case X86::BI__builtin_ia32_kmovw:
16458 case X86::BI__builtin_ia32_kmovd:
16459 case X86::BI__builtin_ia32_kmovq: {
16460 // Bitcast to vXi1 type and then back to integer. This gets the mask
16461 // register type into the IR, but might be optimized out depending on
16462 // what's around it.
16463 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16464 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16465 return Builder.CreateBitCast(Res, Ops[0]->getType());
16466 }
16467
16468 case X86::BI__builtin_ia32_kunpckdi:
16469 case X86::BI__builtin_ia32_kunpcksi:
16470 case X86::BI__builtin_ia32_kunpckhi: {
16471 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16472 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16473 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16474 int Indices[64];
16475 for (unsigned i = 0; i != NumElts; ++i)
16476 Indices[i] = i;
16477
16478 // First extract half of each vector. This gives better codegen than
16479 // doing it in a single shuffle.
16480 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
16481 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16482 // Concat the vectors.
16483 // NOTE: Operands are swapped to match the intrinsic definition.
16484 Value *Res =
16485 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16486 return Builder.CreateBitCast(Res, Ops[0]->getType());
16487 }
16488
16489 case X86::BI__builtin_ia32_vplzcntd_128:
16490 case X86::BI__builtin_ia32_vplzcntd_256:
16491 case X86::BI__builtin_ia32_vplzcntd_512:
16492 case X86::BI__builtin_ia32_vplzcntq_128:
16493 case X86::BI__builtin_ia32_vplzcntq_256:
16494 case X86::BI__builtin_ia32_vplzcntq_512: {
16495 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16496 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16497 }
16498 case X86::BI__builtin_ia32_sqrtss:
16499 case X86::BI__builtin_ia32_sqrtsd: {
16500 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16501 Function *F;
16502 if (Builder.getIsFPConstrained()) {
16503 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16504 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16505 A->getType());
16506 A = Builder.CreateConstrainedFPCall(F, {A});
16507 } else {
16508 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16509 A = Builder.CreateCall(F, {A});
16510 }
16511 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16512 }
16513 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16514 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16515 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16516 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16517 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16518 // otherwise keep the intrinsic.
16519 if (CC != 4) {
16520 Intrinsic::ID IID;
16521
16522 switch (BuiltinID) {
16523 default:
16524 llvm_unreachable("Unsupported intrinsic!");
16525 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16526 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16527 break;
16528 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16529 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16530 break;
16531 case X86::BI__builtin_ia32_sqrtss_round_mask:
16532 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16533 break;
16534 }
16535 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16536 }
16537 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16538 Function *F;
16539 if (Builder.getIsFPConstrained()) {
16540 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16541 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16542 A->getType());
16543 A = Builder.CreateConstrainedFPCall(F, A);
16544 } else {
16545 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16546 A = Builder.CreateCall(F, A);
16547 }
16548 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16549 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16550 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16551 }
16552 case X86::BI__builtin_ia32_sqrtpd256:
16553 case X86::BI__builtin_ia32_sqrtpd:
16554 case X86::BI__builtin_ia32_sqrtps256:
16555 case X86::BI__builtin_ia32_sqrtps:
16556 case X86::BI__builtin_ia32_sqrtph256:
16557 case X86::BI__builtin_ia32_sqrtph:
16558 case X86::BI__builtin_ia32_sqrtph512:
16559 case X86::BI__builtin_ia32_vsqrtnepbf16256:
16560 case X86::BI__builtin_ia32_vsqrtnepbf16:
16561 case X86::BI__builtin_ia32_vsqrtnepbf16512:
16562 case X86::BI__builtin_ia32_sqrtps512:
16563 case X86::BI__builtin_ia32_sqrtpd512: {
16564 if (Ops.size() == 2) {
16565 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16566 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16567 // otherwise keep the intrinsic.
16568 if (CC != 4) {
16569 Intrinsic::ID IID;
16570
16571 switch (BuiltinID) {
16572 default:
16573 llvm_unreachable("Unsupported intrinsic!");
16574 case X86::BI__builtin_ia32_sqrtph512:
16575 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16576 break;
16577 case X86::BI__builtin_ia32_sqrtps512:
16578 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16579 break;
16580 case X86::BI__builtin_ia32_sqrtpd512:
16581 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16582 break;
16583 }
16584 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16585 }
16586 }
16587 if (Builder.getIsFPConstrained()) {
16588 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16589 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16590 Ops[0]->getType());
16591 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16592 } else {
16593 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16594 return Builder.CreateCall(F, Ops[0]);
16595 }
16596 }
16597
16598 case X86::BI__builtin_ia32_pmuludq128:
16599 case X86::BI__builtin_ia32_pmuludq256:
16600 case X86::BI__builtin_ia32_pmuludq512:
16601 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16602
16603 case X86::BI__builtin_ia32_pmuldq128:
16604 case X86::BI__builtin_ia32_pmuldq256:
16605 case X86::BI__builtin_ia32_pmuldq512:
16606 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16607
16608 case X86::BI__builtin_ia32_pternlogd512_mask:
16609 case X86::BI__builtin_ia32_pternlogq512_mask:
16610 case X86::BI__builtin_ia32_pternlogd128_mask:
16611 case X86::BI__builtin_ia32_pternlogd256_mask:
16612 case X86::BI__builtin_ia32_pternlogq128_mask:
16613 case X86::BI__builtin_ia32_pternlogq256_mask:
16614 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16615
16616 case X86::BI__builtin_ia32_pternlogd512_maskz:
16617 case X86::BI__builtin_ia32_pternlogq512_maskz:
16618 case X86::BI__builtin_ia32_pternlogd128_maskz:
16619 case X86::BI__builtin_ia32_pternlogd256_maskz:
16620 case X86::BI__builtin_ia32_pternlogq128_maskz:
16621 case X86::BI__builtin_ia32_pternlogq256_maskz:
16622 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16623
16624 case X86::BI__builtin_ia32_vpshldd128:
16625 case X86::BI__builtin_ia32_vpshldd256:
16626 case X86::BI__builtin_ia32_vpshldd512:
16627 case X86::BI__builtin_ia32_vpshldq128:
16628 case X86::BI__builtin_ia32_vpshldq256:
16629 case X86::BI__builtin_ia32_vpshldq512:
16630 case X86::BI__builtin_ia32_vpshldw128:
16631 case X86::BI__builtin_ia32_vpshldw256:
16632 case X86::BI__builtin_ia32_vpshldw512:
16633 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16634
16635 case X86::BI__builtin_ia32_vpshrdd128:
16636 case X86::BI__builtin_ia32_vpshrdd256:
16637 case X86::BI__builtin_ia32_vpshrdd512:
16638 case X86::BI__builtin_ia32_vpshrdq128:
16639 case X86::BI__builtin_ia32_vpshrdq256:
16640 case X86::BI__builtin_ia32_vpshrdq512:
16641 case X86::BI__builtin_ia32_vpshrdw128:
16642 case X86::BI__builtin_ia32_vpshrdw256:
16643 case X86::BI__builtin_ia32_vpshrdw512:
16644 // Ops 0 and 1 are swapped.
16645 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16646
16647 case X86::BI__builtin_ia32_vpshldvd128:
16648 case X86::BI__builtin_ia32_vpshldvd256:
16649 case X86::BI__builtin_ia32_vpshldvd512:
16650 case X86::BI__builtin_ia32_vpshldvq128:
16651 case X86::BI__builtin_ia32_vpshldvq256:
16652 case X86::BI__builtin_ia32_vpshldvq512:
16653 case X86::BI__builtin_ia32_vpshldvw128:
16654 case X86::BI__builtin_ia32_vpshldvw256:
16655 case X86::BI__builtin_ia32_vpshldvw512:
16656 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16657
16658 case X86::BI__builtin_ia32_vpshrdvd128:
16659 case X86::BI__builtin_ia32_vpshrdvd256:
16660 case X86::BI__builtin_ia32_vpshrdvd512:
16661 case X86::BI__builtin_ia32_vpshrdvq128:
16662 case X86::BI__builtin_ia32_vpshrdvq256:
16663 case X86::BI__builtin_ia32_vpshrdvq512:
16664 case X86::BI__builtin_ia32_vpshrdvw128:
16665 case X86::BI__builtin_ia32_vpshrdvw256:
16666 case X86::BI__builtin_ia32_vpshrdvw512:
16667 // Ops 0 and 1 are swapped.
16668 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16669
16670 // Reductions
16671 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16672 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16673 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16674 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16675 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16676 Function *F =
16677 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16678 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16679 Builder.getFastMathFlags().setAllowReassoc();
16680 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16681 }
16682 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16683 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16684 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16685 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16686 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16687 Function *F =
16688 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16689 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16690 Builder.getFastMathFlags().setAllowReassoc();
16691 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16692 }
16693 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16694 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16695 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16696 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16697 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16698 Function *F =
16699 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16700 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16701 Builder.getFastMathFlags().setNoNaNs();
16702 return Builder.CreateCall(F, {Ops[0]});
16703 }
16704 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16705 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16706 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16707 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16708 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16709 Function *F =
16710 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16711 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16712 Builder.getFastMathFlags().setNoNaNs();
16713 return Builder.CreateCall(F, {Ops[0]});
16714 }
16715
16716 case X86::BI__builtin_ia32_rdrand16_step:
16717 case X86::BI__builtin_ia32_rdrand32_step:
16718 case X86::BI__builtin_ia32_rdrand64_step:
16719 case X86::BI__builtin_ia32_rdseed16_step:
16720 case X86::BI__builtin_ia32_rdseed32_step:
16721 case X86::BI__builtin_ia32_rdseed64_step: {
16722 Intrinsic::ID ID;
16723 switch (BuiltinID) {
16724 default: llvm_unreachable("Unsupported intrinsic!");
16725 case X86::BI__builtin_ia32_rdrand16_step:
16726 ID = Intrinsic::x86_rdrand_16;
16727 break;
16728 case X86::BI__builtin_ia32_rdrand32_step:
16729 ID = Intrinsic::x86_rdrand_32;
16730 break;
16731 case X86::BI__builtin_ia32_rdrand64_step:
16732 ID = Intrinsic::x86_rdrand_64;
16733 break;
16734 case X86::BI__builtin_ia32_rdseed16_step:
16735 ID = Intrinsic::x86_rdseed_16;
16736 break;
16737 case X86::BI__builtin_ia32_rdseed32_step:
16738 ID = Intrinsic::x86_rdseed_32;
16739 break;
16740 case X86::BI__builtin_ia32_rdseed64_step:
16741 ID = Intrinsic::x86_rdseed_64;
16742 break;
16743 }
16744
16745 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16746 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16747 Ops[0]);
16748 return Builder.CreateExtractValue(Call, 1);
16749 }
16750 case X86::BI__builtin_ia32_addcarryx_u32:
16751 case X86::BI__builtin_ia32_addcarryx_u64:
16752 case X86::BI__builtin_ia32_subborrow_u32:
16753 case X86::BI__builtin_ia32_subborrow_u64: {
16754 Intrinsic::ID IID;
16755 switch (BuiltinID) {
16756 default: llvm_unreachable("Unsupported intrinsic!");
16757 case X86::BI__builtin_ia32_addcarryx_u32:
16758 IID = Intrinsic::x86_addcarry_32;
16759 break;
16760 case X86::BI__builtin_ia32_addcarryx_u64:
16761 IID = Intrinsic::x86_addcarry_64;
16762 break;
16763 case X86::BI__builtin_ia32_subborrow_u32:
16764 IID = Intrinsic::x86_subborrow_32;
16765 break;
16766 case X86::BI__builtin_ia32_subborrow_u64:
16767 IID = Intrinsic::x86_subborrow_64;
16768 break;
16769 }
16770
16771 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16772 { Ops[0], Ops[1], Ops[2] });
16773 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16774 Ops[3]);
16775 return Builder.CreateExtractValue(Call, 0);
16776 }
16777
16778 case X86::BI__builtin_ia32_fpclassps128_mask:
16779 case X86::BI__builtin_ia32_fpclassps256_mask:
16780 case X86::BI__builtin_ia32_fpclassps512_mask:
16781 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16782 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16783 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16784 case X86::BI__builtin_ia32_fpclassph128_mask:
16785 case X86::BI__builtin_ia32_fpclassph256_mask:
16786 case X86::BI__builtin_ia32_fpclassph512_mask:
16787 case X86::BI__builtin_ia32_fpclasspd128_mask:
16788 case X86::BI__builtin_ia32_fpclasspd256_mask:
16789 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16790 unsigned NumElts =
16791 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16792 Value *MaskIn = Ops[2];
16793 Ops.erase(&Ops[2]);
16794
16795 Intrinsic::ID ID;
16796 switch (BuiltinID) {
16797 default: llvm_unreachable("Unsupported intrinsic!");
16798 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16799 ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16800 break;
16801 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16802 ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16803 break;
16804 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16805 ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16806 break;
16807 case X86::BI__builtin_ia32_fpclassph128_mask:
16808 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16809 break;
16810 case X86::BI__builtin_ia32_fpclassph256_mask:
16811 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16812 break;
16813 case X86::BI__builtin_ia32_fpclassph512_mask:
16814 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16815 break;
16816 case X86::BI__builtin_ia32_fpclassps128_mask:
16817 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16818 break;
16819 case X86::BI__builtin_ia32_fpclassps256_mask:
16820 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16821 break;
16822 case X86::BI__builtin_ia32_fpclassps512_mask:
16823 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16824 break;
16825 case X86::BI__builtin_ia32_fpclasspd128_mask:
16826 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16827 break;
16828 case X86::BI__builtin_ia32_fpclasspd256_mask:
16829 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16830 break;
16831 case X86::BI__builtin_ia32_fpclasspd512_mask:
16832 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16833 break;
16834 }
16835
16836 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16837 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16838 }
16839
16840 case X86::BI__builtin_ia32_vp2intersect_q_512:
16841 case X86::BI__builtin_ia32_vp2intersect_q_256:
16842 case X86::BI__builtin_ia32_vp2intersect_q_128:
16843 case X86::BI__builtin_ia32_vp2intersect_d_512:
16844 case X86::BI__builtin_ia32_vp2intersect_d_256:
16845 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16846 unsigned NumElts =
16847 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16848 Intrinsic::ID ID;
16849
16850 switch (BuiltinID) {
16851 default: llvm_unreachable("Unsupported intrinsic!");
16852 case X86::BI__builtin_ia32_vp2intersect_q_512:
16853 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16854 break;
16855 case X86::BI__builtin_ia32_vp2intersect_q_256:
16856 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16857 break;
16858 case X86::BI__builtin_ia32_vp2intersect_q_128:
16859 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16860 break;
16861 case X86::BI__builtin_ia32_vp2intersect_d_512:
16862 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16863 break;
16864 case X86::BI__builtin_ia32_vp2intersect_d_256:
16865 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16866 break;
16867 case X86::BI__builtin_ia32_vp2intersect_d_128:
16868 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16869 break;
16870 }
16871
16872 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16873 Value *Result = Builder.CreateExtractValue(Call, 0);
16874 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16876
16877 Result = Builder.CreateExtractValue(Call, 1);
16878 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16880 }
16881
16882 case X86::BI__builtin_ia32_vpmultishiftqb128:
16883 case X86::BI__builtin_ia32_vpmultishiftqb256:
16884 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16885 Intrinsic::ID ID;
16886 switch (BuiltinID) {
16887 default: llvm_unreachable("Unsupported intrinsic!");
16888 case X86::BI__builtin_ia32_vpmultishiftqb128:
16889 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16890 break;
16891 case X86::BI__builtin_ia32_vpmultishiftqb256:
16892 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16893 break;
16894 case X86::BI__builtin_ia32_vpmultishiftqb512:
16895 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16896 break;
16897 }
16898
16899 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16900 }
16901
16902 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16903 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16904 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16905 unsigned NumElts =
16906 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16907 Value *MaskIn = Ops[2];
16908 Ops.erase(&Ops[2]);
16909
16910 Intrinsic::ID ID;
16911 switch (BuiltinID) {
16912 default: llvm_unreachable("Unsupported intrinsic!");
16913 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16914 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16915 break;
16916 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16917 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16918 break;
16919 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16920 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16921 break;
16922 }
16923
16924 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16925 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16926 }
16927
16928 // packed comparison intrinsics
16929 case X86::BI__builtin_ia32_cmpeqps:
16930 case X86::BI__builtin_ia32_cmpeqpd:
16931 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16932 case X86::BI__builtin_ia32_cmpltps:
16933 case X86::BI__builtin_ia32_cmpltpd:
16934 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16935 case X86::BI__builtin_ia32_cmpleps:
16936 case X86::BI__builtin_ia32_cmplepd:
16937 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16938 case X86::BI__builtin_ia32_cmpunordps:
16939 case X86::BI__builtin_ia32_cmpunordpd:
16940 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16941 case X86::BI__builtin_ia32_cmpneqps:
16942 case X86::BI__builtin_ia32_cmpneqpd:
16943 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16944 case X86::BI__builtin_ia32_cmpnltps:
16945 case X86::BI__builtin_ia32_cmpnltpd:
16946 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16947 case X86::BI__builtin_ia32_cmpnleps:
16948 case X86::BI__builtin_ia32_cmpnlepd:
16949 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16950 case X86::BI__builtin_ia32_cmpordps:
16951 case X86::BI__builtin_ia32_cmpordpd:
16952 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16953 case X86::BI__builtin_ia32_cmpph128_mask:
16954 case X86::BI__builtin_ia32_cmpph256_mask:
16955 case X86::BI__builtin_ia32_cmpph512_mask:
16956 case X86::BI__builtin_ia32_cmpps128_mask:
16957 case X86::BI__builtin_ia32_cmpps256_mask:
16958 case X86::BI__builtin_ia32_cmpps512_mask:
16959 case X86::BI__builtin_ia32_cmppd128_mask:
16960 case X86::BI__builtin_ia32_cmppd256_mask:
16961 case X86::BI__builtin_ia32_cmppd512_mask:
16962 case X86::BI__builtin_ia32_vcmppd256_round_mask:
16963 case X86::BI__builtin_ia32_vcmpps256_round_mask:
16964 case X86::BI__builtin_ia32_vcmpph256_round_mask:
16965 case X86::BI__builtin_ia32_vcmppbf16512_mask:
16966 case X86::BI__builtin_ia32_vcmppbf16256_mask:
16967 case X86::BI__builtin_ia32_vcmppbf16128_mask:
16968 IsMaskFCmp = true;
16969 [[fallthrough]];
16970 case X86::BI__builtin_ia32_cmpps:
16971 case X86::BI__builtin_ia32_cmpps256:
16972 case X86::BI__builtin_ia32_cmppd:
16973 case X86::BI__builtin_ia32_cmppd256: {
16974 // Lowering vector comparisons to fcmp instructions, while
16975 // ignoring signalling behaviour requested
16976 // ignoring rounding mode requested
16977 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16978
16979 // The third argument is the comparison condition, and integer in the
16980 // range [0, 31]
16981 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16982
16983 // Lowering to IR fcmp instruction.
16984 // Ignoring requested signaling behaviour,
16985 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16986 FCmpInst::Predicate Pred;
16987 bool IsSignaling;
16988 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16989 // behavior is inverted. We'll handle that after the switch.
16990 switch (CC & 0xf) {
16991 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16992 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16993 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16994 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16995 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16996 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16997 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16998 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16999 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
17000 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
17001 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
17002 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
17003 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
17004 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
17005 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
17006 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
17007 default: llvm_unreachable("Unhandled CC");
17008 }
17009
17010 // Invert the signalling behavior for 16-31.
17011 if (CC & 0x10)
17012 IsSignaling = !IsSignaling;
17013
17014 // If the predicate is true or false and we're using constrained intrinsics,
17015 // we don't have a compare intrinsic we can use. Just use the legacy X86
17016 // specific intrinsic.
17017 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17018 // use the legacy X86 specific intrinsic.
17019 if (Builder.getIsFPConstrained() &&
17020 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17021 IsMaskFCmp)) {
17022
17023 Intrinsic::ID IID;
17024 switch (BuiltinID) {
17025 default: llvm_unreachable("Unexpected builtin");
17026 case X86::BI__builtin_ia32_cmpps:
17027 IID = Intrinsic::x86_sse_cmp_ps;
17028 break;
17029 case X86::BI__builtin_ia32_cmpps256:
17030 IID = Intrinsic::x86_avx_cmp_ps_256;
17031 break;
17032 case X86::BI__builtin_ia32_cmppd:
17033 IID = Intrinsic::x86_sse2_cmp_pd;
17034 break;
17035 case X86::BI__builtin_ia32_cmppd256:
17036 IID = Intrinsic::x86_avx_cmp_pd_256;
17037 break;
17038 case X86::BI__builtin_ia32_cmpph128_mask:
17039 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17040 break;
17041 case X86::BI__builtin_ia32_cmpph256_mask:
17042 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17043 break;
17044 case X86::BI__builtin_ia32_cmpph512_mask:
17045 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17046 break;
17047 case X86::BI__builtin_ia32_cmpps512_mask:
17048 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17049 break;
17050 case X86::BI__builtin_ia32_cmppd512_mask:
17051 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17052 break;
17053 case X86::BI__builtin_ia32_cmpps128_mask:
17054 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17055 break;
17056 case X86::BI__builtin_ia32_cmpps256_mask:
17057 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17058 break;
17059 case X86::BI__builtin_ia32_cmppd128_mask:
17060 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17061 break;
17062 case X86::BI__builtin_ia32_cmppd256_mask:
17063 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17064 break;
17065 }
17066
17067 Function *Intr = CGM.getIntrinsic(IID);
17068 if (IsMaskFCmp) {
17069 unsigned NumElts =
17070 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17071 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
17072 Value *Cmp = Builder.CreateCall(Intr, Ops);
17073 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
17074 }
17075
17076 return Builder.CreateCall(Intr, Ops);
17077 }
17078
17079 // Builtins without the _mask suffix return a vector of integers
17080 // of the same width as the input vectors
17081 if (IsMaskFCmp) {
17082 // We ignore SAE if strict FP is disabled. We only keep precise
17083 // exception behavior under strict FP.
17084 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17085 // object will be required.
17086 unsigned NumElts =
17087 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17088 Value *Cmp;
17089 if (IsSignaling)
17090 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17091 else
17092 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17093 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
17094 }
17095
17096 return getVectorFCmpIR(Pred, IsSignaling);
17097 }
17098
17099 // SSE scalar comparison intrinsics
17100 case X86::BI__builtin_ia32_cmpeqss:
17101 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17102 case X86::BI__builtin_ia32_cmpltss:
17103 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17104 case X86::BI__builtin_ia32_cmpless:
17105 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17106 case X86::BI__builtin_ia32_cmpunordss:
17107 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17108 case X86::BI__builtin_ia32_cmpneqss:
17109 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17110 case X86::BI__builtin_ia32_cmpnltss:
17111 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17112 case X86::BI__builtin_ia32_cmpnless:
17113 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17114 case X86::BI__builtin_ia32_cmpordss:
17115 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17116 case X86::BI__builtin_ia32_cmpeqsd:
17117 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17118 case X86::BI__builtin_ia32_cmpltsd:
17119 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17120 case X86::BI__builtin_ia32_cmplesd:
17121 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17122 case X86::BI__builtin_ia32_cmpunordsd:
17123 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17124 case X86::BI__builtin_ia32_cmpneqsd:
17125 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17126 case X86::BI__builtin_ia32_cmpnltsd:
17127 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17128 case X86::BI__builtin_ia32_cmpnlesd:
17129 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17130 case X86::BI__builtin_ia32_cmpordsd:
17131 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17132
17133 // f16c half2float intrinsics
17134 case X86::BI__builtin_ia32_vcvtph2ps:
17135 case X86::BI__builtin_ia32_vcvtph2ps256:
17136 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17137 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17138 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17139 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
17140 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
17141 }
17142
17143 // AVX512 bf16 intrinsics
17144 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17145 Ops[2] = getMaskVecValue(
17146 *this, Ops[2],
17147 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17148 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17149 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17150 }
17151 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17152 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17153
17154 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17155 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17156 Intrinsic::ID IID;
17157 switch (BuiltinID) {
17158 default: llvm_unreachable("Unsupported intrinsic!");
17159 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17160 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17161 break;
17162 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17163 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17164 break;
17165 }
17166 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
17167 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
17168 }
17169
17170 case X86::BI__cpuid:
17171 case X86::BI__cpuidex: {
17172 Value *FuncId = EmitScalarExpr(E->getArg(1));
17173 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17174 ? EmitScalarExpr(E->getArg(2))
17175 : llvm::ConstantInt::get(Int32Ty, 0);
17176
17177 llvm::StructType *CpuidRetTy =
17178 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
17179 llvm::FunctionType *FTy =
17180 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
17181
17182 StringRef Asm, Constraints;
17183 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
17184 Asm = "cpuid";
17185 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
17186 } else {
17187 // x86-64 uses %rbx as the base register, so preserve it.
17188 Asm = "xchgq %rbx, ${1:q}\n"
17189 "cpuid\n"
17190 "xchgq %rbx, ${1:q}";
17191 Constraints = "={ax},=r,={cx},={dx},0,2";
17192 }
17193
17194 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
17195 /*hasSideEffects=*/false);
17196 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
17197 Value *BasePtr = EmitScalarExpr(E->getArg(0));
17198 Value *Store = nullptr;
17199 for (unsigned i = 0; i < 4; i++) {
17200 Value *Extracted = Builder.CreateExtractValue(IACall, i);
17201 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
17202 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
17203 }
17204
17205 // Return the last store instruction to signal that we have emitted the
17206 // the intrinsic.
17207 return Store;
17208 }
17209
17210 case X86::BI__emul:
17211 case X86::BI__emulu: {
17212 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
17213 bool isSigned = (BuiltinID == X86::BI__emul);
17214 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
17215 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
17216 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
17217 }
17218 case X86::BI__mulh:
17219 case X86::BI__umulh:
17220 case X86::BI_mul128:
17221 case X86::BI_umul128: {
17222 llvm::Type *ResType = ConvertType(E->getType());
17223 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17224
17225 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17226 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17227 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17228
17229 Value *MulResult, *HigherBits;
17230 if (IsSigned) {
17231 MulResult = Builder.CreateNSWMul(LHS, RHS);
17232 HigherBits = Builder.CreateAShr(MulResult, 64);
17233 } else {
17234 MulResult = Builder.CreateNUWMul(LHS, RHS);
17235 HigherBits = Builder.CreateLShr(MulResult, 64);
17236 }
17237 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17238
17239 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17240 return HigherBits;
17241
17242 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
17243 Builder.CreateStore(HigherBits, HighBitsAddress);
17244 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17245 }
17246
17247 case X86::BI__faststorefence: {
17248 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17249 llvm::SyncScope::System);
17250 }
17251 case X86::BI__shiftleft128:
17252 case X86::BI__shiftright128: {
17253 llvm::Function *F = CGM.getIntrinsic(
17254 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17255 Int64Ty);
17256 // Flip low/high ops and zero-extend amount to matching type.
17257 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17258 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17259 std::swap(Ops[0], Ops[1]);
17260 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
17261 return Builder.CreateCall(F, Ops);
17262 }
17263 case X86::BI_ReadWriteBarrier:
17264 case X86::BI_ReadBarrier:
17265 case X86::BI_WriteBarrier: {
17266 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17267 llvm::SyncScope::SingleThread);
17268 }
17269
17270 case X86::BI_AddressOfReturnAddress: {
17271 Function *F =
17272 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
17273 return Builder.CreateCall(F);
17274 }
17275 case X86::BI__stosb: {
17276 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17277 // instruction, but it will create a memset that won't be optimized away.
17278 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
17279 }
17280 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17281 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17282 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17283 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17284 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17285 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17286 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17287 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17288 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17289 Intrinsic::ID IID;
17290 switch (BuiltinID) {
17291 default:
17292 llvm_unreachable("Unsupported intrinsic!");
17293 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17294 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17295 break;
17296 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17297 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17298 break;
17299 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17300 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17301 break;
17302 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17303 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17304 break;
17305 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17306 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17307 break;
17308 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17309 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17310 break;
17311 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17312 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17313 break;
17314 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17315 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17316 break;
17317 }
17318
17319 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17320 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17321 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17322
17323 auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17324 assert(PtrTy && "arg3 must be of pointer type");
17325 QualType PtreeTy = PtrTy->getPointeeType();
17326 llvm::Type *TyPtee = ConvertType(PtreeTy);
17327
17328 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17329 // Then store tile0 into DstPtr0
17330 Value *T0 = Builder.CreateExtractValue(Call, 0);
17331 Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17332 {TyPtee}, {T0});
17333 Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17334
17335 // Then store tile1 into DstPtr1
17336 Value *T1 = Builder.CreateExtractValue(Call, 1);
17337 Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17338 {TyPtee}, {T1});
17339 Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17340
17341 // Note: Here we escape directly use x86_tilestored64_internal to store
17342 // the results due to it can't make sure the Mem written scope. This may
17343 // cause shapes reloads after first amx intrinsic, which current amx reg-
17344 // ister allocation has no ability to handle it.
17345
17346 return Store;
17347 }
17348 case X86::BI__ud2:
17349 // llvm.trap makes a ud2a instruction on x86.
17350 return EmitTrapCall(Intrinsic::trap);
17351 case X86::BI__int2c: {
17352 // This syscall signals a driver assertion failure in x86 NT kernels.
17353 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
17354 llvm::InlineAsm *IA =
17355 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
17356 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17357 getLLVMContext(), llvm::AttributeList::FunctionIndex,
17358 llvm::Attribute::NoReturn);
17359 llvm::CallInst *CI = Builder.CreateCall(IA);
17360 CI->setAttributes(NoReturnAttr);
17361 return CI;
17362 }
17363 case X86::BI__readfsbyte:
17364 case X86::BI__readfsword:
17365 case X86::BI__readfsdword:
17366 case X86::BI__readfsqword: {
17367 llvm::Type *IntTy = ConvertType(E->getType());
17368 Value *Ptr = Builder.CreateIntToPtr(
17369 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
17370 LoadInst *Load = Builder.CreateAlignedLoad(
17371 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17372 Load->setVolatile(true);
17373 return Load;
17374 }
17375 case X86::BI__readgsbyte:
17376 case X86::BI__readgsword:
17377 case X86::BI__readgsdword:
17378 case X86::BI__readgsqword: {
17379 llvm::Type *IntTy = ConvertType(E->getType());
17380 Value *Ptr = Builder.CreateIntToPtr(
17381 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
17382 LoadInst *Load = Builder.CreateAlignedLoad(
17383 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17384 Load->setVolatile(true);
17385 return Load;
17386 }
17387 case X86::BI__builtin_ia32_encodekey128_u32: {
17388 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17389
17390 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
17391
17392 for (int i = 0; i < 3; ++i) {
17393 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17394 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
17395 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17396 }
17397
17398 return Builder.CreateExtractValue(Call, 0);
17399 }
17400 case X86::BI__builtin_ia32_encodekey256_u32: {
17401 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17402
17403 Value *Call =
17404 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
17405
17406 for (int i = 0; i < 4; ++i) {
17407 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17408 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
17409 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17410 }
17411
17412 return Builder.CreateExtractValue(Call, 0);
17413 }
17414 case X86::BI__builtin_ia32_aesenc128kl_u8:
17415 case X86::BI__builtin_ia32_aesdec128kl_u8:
17416 case X86::BI__builtin_ia32_aesenc256kl_u8:
17417 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17418 Intrinsic::ID IID;
17419 StringRef BlockName;
17420 switch (BuiltinID) {
17421 default:
17422 llvm_unreachable("Unexpected builtin");
17423 case X86::BI__builtin_ia32_aesenc128kl_u8:
17424 IID = Intrinsic::x86_aesenc128kl;
17425 BlockName = "aesenc128kl";
17426 break;
17427 case X86::BI__builtin_ia32_aesdec128kl_u8:
17428 IID = Intrinsic::x86_aesdec128kl;
17429 BlockName = "aesdec128kl";
17430 break;
17431 case X86::BI__builtin_ia32_aesenc256kl_u8:
17432 IID = Intrinsic::x86_aesenc256kl;
17433 BlockName = "aesenc256kl";
17434 break;
17435 case X86::BI__builtin_ia32_aesdec256kl_u8:
17436 IID = Intrinsic::x86_aesdec256kl;
17437 BlockName = "aesdec256kl";
17438 break;
17439 }
17440
17441 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
17442
17443 BasicBlock *NoError =
17444 createBasicBlock(BlockName + "_no_error", this->CurFn);
17445 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17446 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17447
17448 Value *Ret = Builder.CreateExtractValue(Call, 0);
17449 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17450 Value *Out = Builder.CreateExtractValue(Call, 1);
17451 Builder.CreateCondBr(Succ, NoError, Error);
17452
17453 Builder.SetInsertPoint(NoError);
17455 Builder.CreateBr(End);
17456
17457 Builder.SetInsertPoint(Error);
17458 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17459 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
17460 Builder.CreateBr(End);
17461
17462 Builder.SetInsertPoint(End);
17463 return Builder.CreateExtractValue(Call, 0);
17464 }
17465 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17466 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17467 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17468 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17469 Intrinsic::ID IID;
17470 StringRef BlockName;
17471 switch (BuiltinID) {
17472 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17473 IID = Intrinsic::x86_aesencwide128kl;
17474 BlockName = "aesencwide128kl";
17475 break;
17476 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17477 IID = Intrinsic::x86_aesdecwide128kl;
17478 BlockName = "aesdecwide128kl";
17479 break;
17480 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17481 IID = Intrinsic::x86_aesencwide256kl;
17482 BlockName = "aesencwide256kl";
17483 break;
17484 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17485 IID = Intrinsic::x86_aesdecwide256kl;
17486 BlockName = "aesdecwide256kl";
17487 break;
17488 }
17489
17490 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
17491 Value *InOps[9];
17492 InOps[0] = Ops[2];
17493 for (int i = 0; i != 8; ++i) {
17494 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17495 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
17496 }
17497
17498 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
17499
17500 BasicBlock *NoError =
17501 createBasicBlock(BlockName + "_no_error", this->CurFn);
17502 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17503 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17504
17505 Value *Ret = Builder.CreateExtractValue(Call, 0);
17506 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17507 Builder.CreateCondBr(Succ, NoError, Error);
17508
17509 Builder.SetInsertPoint(NoError);
17510 for (int i = 0; i != 8; ++i) {
17511 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17512 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
17513 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
17514 }
17515 Builder.CreateBr(End);
17516
17517 Builder.SetInsertPoint(Error);
17518 for (int i = 0; i != 8; ++i) {
17519 Value *Out = Builder.CreateExtractValue(Call, i + 1);
17520 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17521 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17522 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
17523 }
17524 Builder.CreateBr(End);
17525
17526 Builder.SetInsertPoint(End);
17527 return Builder.CreateExtractValue(Call, 0);
17528 }
17529 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17530 IsConjFMA = true;
17531 [[fallthrough]];
17532 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17533 Intrinsic::ID IID = IsConjFMA
17534 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17535 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17536 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17537 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17538 }
17539 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17540 IsConjFMA = true;
17541 LLVM_FALLTHROUGH;
17542 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17543 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17544 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17545 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17546 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17547 }
17548 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17549 IsConjFMA = true;
17550 [[fallthrough]];
17551 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17552 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17553 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17554 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17555 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
17556 return EmitX86Select(*this, And, Call, Ops[0]);
17557 }
17558 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17559 IsConjFMA = true;
17560 [[fallthrough]];
17561 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17562 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17563 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17564 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17565 static constexpr int Mask[] = {0, 5, 6, 7};
17566 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
17567 }
17568 case X86::BI__builtin_ia32_prefetchi:
17569 return Builder.CreateCall(
17570 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17571 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17572 llvm::ConstantInt::get(Int32Ty, 0)});
17573 }
17574}
17575
17576Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17577 const CallExpr *E) {
17578 // Do not emit the builtin arguments in the arguments of a function call,
17579 // because the evaluation order of function arguments is not specified in C++.
17580 // This is important when testing to ensure the arguments are emitted in the
17581 // same order every time. Eg:
17582 // Instead of:
17583 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17584 // EmitScalarExpr(E->getArg(1)), "swdiv");
17585 // Use:
17586 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17587 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17588 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17589
17590 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17591
17592#include "llvm/TargetParser/PPCTargetParser.def"
17593 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17594 unsigned Mask, CmpInst::Predicate CompOp,
17595 unsigned OpValue) -> Value * {
17596 if (SupportMethod == BUILTIN_PPC_FALSE)
17597 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17598
17599 if (SupportMethod == BUILTIN_PPC_TRUE)
17600 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17601
17602 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17603
17604 llvm::Value *FieldValue = nullptr;
17605 if (SupportMethod == USE_SYS_CONF) {
17606 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17607 llvm::Constant *SysConf =
17608 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17609
17610 // Grab the appropriate field from _system_configuration.
17611 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17612 ConstantInt::get(Int32Ty, FieldIdx)};
17613
17614 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17615 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17617 } else if (SupportMethod == SYS_CALL) {
17618 llvm::FunctionType *FTy =
17619 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17620 llvm::FunctionCallee Func =
17621 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17622
17623 FieldValue =
17624 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17625 }
17626 assert(FieldValue &&
17627 "SupportMethod value is not defined in PPCTargetParser.def.");
17628
17629 if (Mask)
17630 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17631
17632 llvm::Type *ValueType = FieldValue->getType();
17633 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17634 assert(
17635 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17636 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17637
17638 return Builder.CreateICmp(
17639 CompOp, FieldValue,
17640 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17641 };
17642
17643 switch (BuiltinID) {
17644 default: return nullptr;
17645
17646 case Builtin::BI__builtin_cpu_is: {
17647 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17648 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17649 llvm::Triple Triple = getTarget().getTriple();
17650
17651 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17652 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17653
17654 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17655 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17656#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17657 AIXID) \
17658 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17659#include "llvm/TargetParser/PPCTargetParser.def"
17660 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17661 BUILTIN_PPC_UNSUPPORTED, 0}));
17662
17663 if (Triple.isOSAIX()) {
17664 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17665 "Invalid CPU name. Missed by SemaChecking?");
17666 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17667 ICmpInst::ICMP_EQ, AIXIDValue);
17668 }
17669
17670 assert(Triple.isOSLinux() &&
17671 "__builtin_cpu_is() is only supported for AIX and Linux.");
17672
17673 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17674 "Invalid CPU name. Missed by SemaChecking?");
17675
17676 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17677 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17678
17679 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17680 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17681 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17682 return Builder.CreateICmpEQ(TheCall,
17683 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17684 }
17685 case Builtin::BI__builtin_cpu_supports: {
17686 llvm::Triple Triple = getTarget().getTriple();
17687 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17688 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17689 if (Triple.isOSAIX()) {
17690 unsigned SupportMethod, FieldIdx, Mask, Value;
17691 CmpInst::Predicate CompOp;
17692 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17693 unsigned>
17694 CPUSupportType;
17695 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17696 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17697#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17698 VALUE) \
17699 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17700#include "llvm/TargetParser/PPCTargetParser.def"
17701 .Default({BUILTIN_PPC_FALSE, 0, 0,
17702 CmpInst::Predicate(), 0}));
17703 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17704 Value);
17705 }
17706
17707 assert(Triple.isOSLinux() &&
17708 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17709 unsigned FeatureWord;
17710 unsigned BitMask;
17711 std::tie(FeatureWord, BitMask) =
17712 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17713#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17714 .Case(Name, {FA_WORD, Bitmask})
17715#include "llvm/TargetParser/PPCTargetParser.def"
17716 .Default({0, 0});
17717 if (!BitMask)
17718 return Builder.getFalse();
17719 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17720 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17721 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17722 Value *Mask =
17723 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17724 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17725#undef PPC_FAWORD_HWCAP
17726#undef PPC_FAWORD_HWCAP2
17727#undef PPC_FAWORD_CPUID
17728 }
17729
17730 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17731 // call __builtin_readcyclecounter.
17732 case PPC::BI__builtin_ppc_get_timebase:
17733 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17734
17735 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17736 case PPC::BI__builtin_altivec_lvx:
17737 case PPC::BI__builtin_altivec_lvxl:
17738 case PPC::BI__builtin_altivec_lvebx:
17739 case PPC::BI__builtin_altivec_lvehx:
17740 case PPC::BI__builtin_altivec_lvewx:
17741 case PPC::BI__builtin_altivec_lvsl:
17742 case PPC::BI__builtin_altivec_lvsr:
17743 case PPC::BI__builtin_vsx_lxvd2x:
17744 case PPC::BI__builtin_vsx_lxvw4x:
17745 case PPC::BI__builtin_vsx_lxvd2x_be:
17746 case PPC::BI__builtin_vsx_lxvw4x_be:
17747 case PPC::BI__builtin_vsx_lxvl:
17748 case PPC::BI__builtin_vsx_lxvll:
17749 {
17751 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17752 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17753 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17754 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17755 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17756 Ops.pop_back();
17757 }
17758
17759 switch (BuiltinID) {
17760 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17761 case PPC::BI__builtin_altivec_lvx:
17762 ID = Intrinsic::ppc_altivec_lvx;
17763 break;
17764 case PPC::BI__builtin_altivec_lvxl:
17765 ID = Intrinsic::ppc_altivec_lvxl;
17766 break;
17767 case PPC::BI__builtin_altivec_lvebx:
17768 ID = Intrinsic::ppc_altivec_lvebx;
17769 break;
17770 case PPC::BI__builtin_altivec_lvehx:
17771 ID = Intrinsic::ppc_altivec_lvehx;
17772 break;
17773 case PPC::BI__builtin_altivec_lvewx:
17774 ID = Intrinsic::ppc_altivec_lvewx;
17775 break;
17776 case PPC::BI__builtin_altivec_lvsl:
17777 ID = Intrinsic::ppc_altivec_lvsl;
17778 break;
17779 case PPC::BI__builtin_altivec_lvsr:
17780 ID = Intrinsic::ppc_altivec_lvsr;
17781 break;
17782 case PPC::BI__builtin_vsx_lxvd2x:
17783 ID = Intrinsic::ppc_vsx_lxvd2x;
17784 break;
17785 case PPC::BI__builtin_vsx_lxvw4x:
17786 ID = Intrinsic::ppc_vsx_lxvw4x;
17787 break;
17788 case PPC::BI__builtin_vsx_lxvd2x_be:
17789 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17790 break;
17791 case PPC::BI__builtin_vsx_lxvw4x_be:
17792 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17793 break;
17794 case PPC::BI__builtin_vsx_lxvl:
17795 ID = Intrinsic::ppc_vsx_lxvl;
17796 break;
17797 case PPC::BI__builtin_vsx_lxvll:
17798 ID = Intrinsic::ppc_vsx_lxvll;
17799 break;
17800 }
17801 llvm::Function *F = CGM.getIntrinsic(ID);
17802 return Builder.CreateCall(F, Ops, "");
17803 }
17804
17805 // vec_st, vec_xst_be
17806 case PPC::BI__builtin_altivec_stvx:
17807 case PPC::BI__builtin_altivec_stvxl:
17808 case PPC::BI__builtin_altivec_stvebx:
17809 case PPC::BI__builtin_altivec_stvehx:
17810 case PPC::BI__builtin_altivec_stvewx:
17811 case PPC::BI__builtin_vsx_stxvd2x:
17812 case PPC::BI__builtin_vsx_stxvw4x:
17813 case PPC::BI__builtin_vsx_stxvd2x_be:
17814 case PPC::BI__builtin_vsx_stxvw4x_be:
17815 case PPC::BI__builtin_vsx_stxvl:
17816 case PPC::BI__builtin_vsx_stxvll:
17817 {
17819 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17820 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17821 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17822 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17823 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17824 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17825 Ops.pop_back();
17826 }
17827
17828 switch (BuiltinID) {
17829 default: llvm_unreachable("Unsupported st intrinsic!");
17830 case PPC::BI__builtin_altivec_stvx:
17831 ID = Intrinsic::ppc_altivec_stvx;
17832 break;
17833 case PPC::BI__builtin_altivec_stvxl:
17834 ID = Intrinsic::ppc_altivec_stvxl;
17835 break;
17836 case PPC::BI__builtin_altivec_stvebx:
17837 ID = Intrinsic::ppc_altivec_stvebx;
17838 break;
17839 case PPC::BI__builtin_altivec_stvehx:
17840 ID = Intrinsic::ppc_altivec_stvehx;
17841 break;
17842 case PPC::BI__builtin_altivec_stvewx:
17843 ID = Intrinsic::ppc_altivec_stvewx;
17844 break;
17845 case PPC::BI__builtin_vsx_stxvd2x:
17846 ID = Intrinsic::ppc_vsx_stxvd2x;
17847 break;
17848 case PPC::BI__builtin_vsx_stxvw4x:
17849 ID = Intrinsic::ppc_vsx_stxvw4x;
17850 break;
17851 case PPC::BI__builtin_vsx_stxvd2x_be:
17852 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17853 break;
17854 case PPC::BI__builtin_vsx_stxvw4x_be:
17855 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17856 break;
17857 case PPC::BI__builtin_vsx_stxvl:
17858 ID = Intrinsic::ppc_vsx_stxvl;
17859 break;
17860 case PPC::BI__builtin_vsx_stxvll:
17861 ID = Intrinsic::ppc_vsx_stxvll;
17862 break;
17863 }
17864 llvm::Function *F = CGM.getIntrinsic(ID);
17865 return Builder.CreateCall(F, Ops, "");
17866 }
17867 case PPC::BI__builtin_vsx_ldrmb: {
17868 // Essentially boils down to performing an unaligned VMX load sequence so
17869 // as to avoid crossing a page boundary and then shuffling the elements
17870 // into the right side of the vector register.
17871 Value *Op0 = EmitScalarExpr(E->getArg(0));
17872 Value *Op1 = EmitScalarExpr(E->getArg(1));
17873 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17874 llvm::Type *ResTy = ConvertType(E->getType());
17875 bool IsLE = getTarget().isLittleEndian();
17876
17877 // If the user wants the entire vector, just load the entire vector.
17878 if (NumBytes == 16) {
17879 Value *LD =
17881 if (!IsLE)
17882 return LD;
17883
17884 // Reverse the bytes on LE.
17885 SmallVector<int, 16> RevMask;
17886 for (int Idx = 0; Idx < 16; Idx++)
17887 RevMask.push_back(15 - Idx);
17888 return Builder.CreateShuffleVector(LD, LD, RevMask);
17889 }
17890
17891 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17892 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17893 : Intrinsic::ppc_altivec_lvsl);
17894 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17895 Value *HiMem = Builder.CreateGEP(
17896 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17897 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17898 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17899 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17900
17901 Op0 = IsLE ? HiLd : LoLd;
17902 Op1 = IsLE ? LoLd : HiLd;
17903 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17904 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17905
17906 if (IsLE) {
17907 SmallVector<int, 16> Consts;
17908 for (int Idx = 0; Idx < 16; Idx++) {
17909 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17910 : 16 - (NumBytes - Idx);
17911 Consts.push_back(Val);
17912 }
17913 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17914 Zero, Consts);
17915 }
17917 for (int Idx = 0; Idx < 16; Idx++)
17918 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17919 Value *Mask2 = ConstantVector::get(Consts);
17920 return Builder.CreateBitCast(
17921 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17922 }
17923 case PPC::BI__builtin_vsx_strmb: {
17924 Value *Op0 = EmitScalarExpr(E->getArg(0));
17925 Value *Op1 = EmitScalarExpr(E->getArg(1));
17926 Value *Op2 = EmitScalarExpr(E->getArg(2));
17927 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17928 bool IsLE = getTarget().isLittleEndian();
17929 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17930 // Storing the whole vector, simply store it on BE and reverse bytes and
17931 // store on LE.
17932 if (Width == 16) {
17933 Value *StVec = Op2;
17934 if (IsLE) {
17935 SmallVector<int, 16> RevMask;
17936 for (int Idx = 0; Idx < 16; Idx++)
17937 RevMask.push_back(15 - Idx);
17938 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17939 }
17940 return Builder.CreateStore(
17941 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17942 }
17943 auto *ConvTy = Int64Ty;
17944 unsigned NumElts = 0;
17945 switch (Width) {
17946 default:
17947 llvm_unreachable("width for stores must be a power of 2");
17948 case 8:
17949 ConvTy = Int64Ty;
17950 NumElts = 2;
17951 break;
17952 case 4:
17953 ConvTy = Int32Ty;
17954 NumElts = 4;
17955 break;
17956 case 2:
17957 ConvTy = Int16Ty;
17958 NumElts = 8;
17959 break;
17960 case 1:
17961 ConvTy = Int8Ty;
17962 NumElts = 16;
17963 break;
17964 }
17965 Value *Vec = Builder.CreateBitCast(
17966 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17967 Value *Ptr =
17968 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17969 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17970 if (IsLE && Width > 1) {
17971 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17972 Elt = Builder.CreateCall(F, Elt);
17973 }
17974 return Builder.CreateStore(
17975 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17976 };
17977 unsigned Stored = 0;
17978 unsigned RemainingBytes = NumBytes;
17979 Value *Result;
17980 if (NumBytes == 16)
17981 return StoreSubVec(16, 0, 0);
17982 if (NumBytes >= 8) {
17983 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17984 RemainingBytes -= 8;
17985 Stored += 8;
17986 }
17987 if (RemainingBytes >= 4) {
17988 Result = StoreSubVec(4, NumBytes - Stored - 4,
17989 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17990 RemainingBytes -= 4;
17991 Stored += 4;
17992 }
17993 if (RemainingBytes >= 2) {
17994 Result = StoreSubVec(2, NumBytes - Stored - 2,
17995 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17996 RemainingBytes -= 2;
17997 Stored += 2;
17998 }
17999 if (RemainingBytes)
18000 Result =
18001 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18002 return Result;
18003 }
18004 // Square root
18005 case PPC::BI__builtin_vsx_xvsqrtsp:
18006 case PPC::BI__builtin_vsx_xvsqrtdp: {
18007 llvm::Type *ResultType = ConvertType(E->getType());
18008 Value *X = EmitScalarExpr(E->getArg(0));
18009 if (Builder.getIsFPConstrained()) {
18010 llvm::Function *F = CGM.getIntrinsic(
18011 Intrinsic::experimental_constrained_sqrt, ResultType);
18012 return Builder.CreateConstrainedFPCall(F, X);
18013 } else {
18014 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18015 return Builder.CreateCall(F, X);
18016 }
18017 }
18018 // Count leading zeros
18019 case PPC::BI__builtin_altivec_vclzb:
18020 case PPC::BI__builtin_altivec_vclzh:
18021 case PPC::BI__builtin_altivec_vclzw:
18022 case PPC::BI__builtin_altivec_vclzd: {
18023 llvm::Type *ResultType = ConvertType(E->getType());
18024 Value *X = EmitScalarExpr(E->getArg(0));
18025 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18026 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18027 return Builder.CreateCall(F, {X, Undef});
18028 }
18029 case PPC::BI__builtin_altivec_vctzb:
18030 case PPC::BI__builtin_altivec_vctzh:
18031 case PPC::BI__builtin_altivec_vctzw:
18032 case PPC::BI__builtin_altivec_vctzd: {
18033 llvm::Type *ResultType = ConvertType(E->getType());
18034 Value *X = EmitScalarExpr(E->getArg(0));
18035 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18036 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18037 return Builder.CreateCall(F, {X, Undef});
18038 }
18039 case PPC::BI__builtin_altivec_vinsd:
18040 case PPC::BI__builtin_altivec_vinsw:
18041 case PPC::BI__builtin_altivec_vinsd_elt:
18042 case PPC::BI__builtin_altivec_vinsw_elt: {
18043 llvm::Type *ResultType = ConvertType(E->getType());
18044 Value *Op0 = EmitScalarExpr(E->getArg(0));
18045 Value *Op1 = EmitScalarExpr(E->getArg(1));
18046 Value *Op2 = EmitScalarExpr(E->getArg(2));
18047
18048 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18049 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18050
18051 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18052 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18053
18054 // The third argument must be a compile time constant.
18055 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18056 assert(ArgCI &&
18057 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18058
18059 // Valid value for the third argument is dependent on the input type and
18060 // builtin called.
18061 int ValidMaxValue = 0;
18062 if (IsUnaligned)
18063 ValidMaxValue = (Is32bit) ? 12 : 8;
18064 else
18065 ValidMaxValue = (Is32bit) ? 3 : 1;
18066
18067 // Get value of third argument.
18068 int64_t ConstArg = ArgCI->getSExtValue();
18069
18070 // Compose range checking error message.
18071 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
18072 RangeErrMsg += " number " + llvm::to_string(ConstArg);
18073 RangeErrMsg += " is outside of the valid range [0, ";
18074 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
18075
18076 // Issue error if third argument is not within the valid range.
18077 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18078 CGM.Error(E->getExprLoc(), RangeErrMsg);
18079
18080 // Input to vec_replace_elt is an element index, convert to byte index.
18081 if (!IsUnaligned) {
18082 ConstArg *= Is32bit ? 4 : 8;
18083 // Fix the constant according to endianess.
18084 if (getTarget().isLittleEndian())
18085 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18086 }
18087
18088 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18089 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
18090 // Casting input to vector int as per intrinsic definition.
18091 Op0 =
18092 Is32bit
18093 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
18094 : Builder.CreateBitCast(Op0,
18095 llvm::FixedVectorType::get(Int64Ty, 2));
18096 return Builder.CreateBitCast(
18097 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
18098 }
18099 case PPC::BI__builtin_altivec_vadduqm:
18100 case PPC::BI__builtin_altivec_vsubuqm: {
18101 Value *Op0 = EmitScalarExpr(E->getArg(0));
18102 Value *Op1 = EmitScalarExpr(E->getArg(1));
18103 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
18104 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18105 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18106 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18107 return Builder.CreateAdd(Op0, Op1, "vadduqm");
18108 else
18109 return Builder.CreateSub(Op0, Op1, "vsubuqm");
18110 }
18111 case PPC::BI__builtin_altivec_vaddcuq_c:
18112 case PPC::BI__builtin_altivec_vsubcuq_c: {
18114 Value *Op0 = EmitScalarExpr(E->getArg(0));
18115 Value *Op1 = EmitScalarExpr(E->getArg(1));
18116 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18117 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18118 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18119 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18120 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18121 ? Intrinsic::ppc_altivec_vaddcuq
18122 : Intrinsic::ppc_altivec_vsubcuq;
18123 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18124 }
18125 case PPC::BI__builtin_altivec_vaddeuqm_c:
18126 case PPC::BI__builtin_altivec_vaddecuq_c:
18127 case PPC::BI__builtin_altivec_vsubeuqm_c:
18128 case PPC::BI__builtin_altivec_vsubecuq_c: {
18130 Value *Op0 = EmitScalarExpr(E->getArg(0));
18131 Value *Op1 = EmitScalarExpr(E->getArg(1));
18132 Value *Op2 = EmitScalarExpr(E->getArg(2));
18133 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18134 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18135 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18136 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18137 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
18138 switch (BuiltinID) {
18139 default:
18140 llvm_unreachable("Unsupported intrinsic!");
18141 case PPC::BI__builtin_altivec_vaddeuqm_c:
18142 ID = Intrinsic::ppc_altivec_vaddeuqm;
18143 break;
18144 case PPC::BI__builtin_altivec_vaddecuq_c:
18145 ID = Intrinsic::ppc_altivec_vaddecuq;
18146 break;
18147 case PPC::BI__builtin_altivec_vsubeuqm_c:
18148 ID = Intrinsic::ppc_altivec_vsubeuqm;
18149 break;
18150 case PPC::BI__builtin_altivec_vsubecuq_c:
18151 ID = Intrinsic::ppc_altivec_vsubecuq;
18152 break;
18153 }
18154 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18155 }
18156 case PPC::BI__builtin_ppc_rldimi:
18157 case PPC::BI__builtin_ppc_rlwimi: {
18158 Value *Op0 = EmitScalarExpr(E->getArg(0));
18159 Value *Op1 = EmitScalarExpr(E->getArg(1));
18160 Value *Op2 = EmitScalarExpr(E->getArg(2));
18161 Value *Op3 = EmitScalarExpr(E->getArg(3));
18162 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18163 // leverage peephole and avoid legalization efforts.
18164 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18165 !getTarget().getTriple().isPPC64()) {
18166 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
18167 Op2 = Builder.CreateZExt(Op2, Int64Ty);
18168 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
18169 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
18170 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
18171 }
18172 return Builder.CreateCall(
18173 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
18174 ? Intrinsic::ppc_rldimi
18175 : Intrinsic::ppc_rlwimi),
18176 {Op0, Op1, Op2, Op3});
18177 }
18178 case PPC::BI__builtin_ppc_rlwnm: {
18179 Value *Op0 = EmitScalarExpr(E->getArg(0));
18180 Value *Op1 = EmitScalarExpr(E->getArg(1));
18181 Value *Op2 = EmitScalarExpr(E->getArg(2));
18182 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
18183 {Op0, Op1, Op2});
18184 }
18185 case PPC::BI__builtin_ppc_poppar4:
18186 case PPC::BI__builtin_ppc_poppar8: {
18187 Value *Op0 = EmitScalarExpr(E->getArg(0));
18188 llvm::Type *ArgType = Op0->getType();
18189 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
18190 Value *Tmp = Builder.CreateCall(F, Op0);
18191
18192 llvm::Type *ResultType = ConvertType(E->getType());
18193 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
18194 if (Result->getType() != ResultType)
18195 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
18196 "cast");
18197 return Result;
18198 }
18199 case PPC::BI__builtin_ppc_cmpb: {
18200 Value *Op0 = EmitScalarExpr(E->getArg(0));
18201 Value *Op1 = EmitScalarExpr(E->getArg(1));
18202 if (getTarget().getTriple().isPPC64()) {
18203 Function *F =
18204 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
18205 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
18206 }
18207 // For 32 bit, emit the code as below:
18208 // %conv = trunc i64 %a to i32
18209 // %conv1 = trunc i64 %b to i32
18210 // %shr = lshr i64 %a, 32
18211 // %conv2 = trunc i64 %shr to i32
18212 // %shr3 = lshr i64 %b, 32
18213 // %conv4 = trunc i64 %shr3 to i32
18214 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18215 // %conv5 = zext i32 %0 to i64
18216 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18217 // %conv614 = zext i32 %1 to i64
18218 // %shl = shl nuw i64 %conv614, 32
18219 // %or = or i64 %shl, %conv5
18220 // ret i64 %or
18221 Function *F =
18222 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
18223 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
18224 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
18225 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
18226 Value *ArgOneHi =
18227 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
18228 Value *ArgTwoHi =
18229 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
18230 Value *ResLo = Builder.CreateZExt(
18231 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
18232 Value *ResHiShift = Builder.CreateZExt(
18233 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
18234 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
18235 return Builder.CreateOr(ResLo, ResHi);
18236 }
18237 // Copy sign
18238 case PPC::BI__builtin_vsx_xvcpsgnsp:
18239 case PPC::BI__builtin_vsx_xvcpsgndp: {
18240 llvm::Type *ResultType = ConvertType(E->getType());
18241 Value *X = EmitScalarExpr(E->getArg(0));
18242 Value *Y = EmitScalarExpr(E->getArg(1));
18243 ID = Intrinsic::copysign;
18244 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18245 return Builder.CreateCall(F, {X, Y});
18246 }
18247 // Rounding/truncation
18248 case PPC::BI__builtin_vsx_xvrspip:
18249 case PPC::BI__builtin_vsx_xvrdpip:
18250 case PPC::BI__builtin_vsx_xvrdpim:
18251 case PPC::BI__builtin_vsx_xvrspim:
18252 case PPC::BI__builtin_vsx_xvrdpi:
18253 case PPC::BI__builtin_vsx_xvrspi:
18254 case PPC::BI__builtin_vsx_xvrdpic:
18255 case PPC::BI__builtin_vsx_xvrspic:
18256 case PPC::BI__builtin_vsx_xvrdpiz:
18257 case PPC::BI__builtin_vsx_xvrspiz: {
18258 llvm::Type *ResultType = ConvertType(E->getType());
18259 Value *X = EmitScalarExpr(E->getArg(0));
18260 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18261 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18262 ID = Builder.getIsFPConstrained()
18263 ? Intrinsic::experimental_constrained_floor
18264 : Intrinsic::floor;
18265 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18266 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18267 ID = Builder.getIsFPConstrained()
18268 ? Intrinsic::experimental_constrained_round
18269 : Intrinsic::round;
18270 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18271 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18272 ID = Builder.getIsFPConstrained()
18273 ? Intrinsic::experimental_constrained_rint
18274 : Intrinsic::rint;
18275 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18276 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18277 ID = Builder.getIsFPConstrained()
18278 ? Intrinsic::experimental_constrained_ceil
18279 : Intrinsic::ceil;
18280 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18281 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18282 ID = Builder.getIsFPConstrained()
18283 ? Intrinsic::experimental_constrained_trunc
18284 : Intrinsic::trunc;
18285 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18286 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
18287 : Builder.CreateCall(F, X);
18288 }
18289
18290 // Absolute value
18291 case PPC::BI__builtin_vsx_xvabsdp:
18292 case PPC::BI__builtin_vsx_xvabssp: {
18293 llvm::Type *ResultType = ConvertType(E->getType());
18294 Value *X = EmitScalarExpr(E->getArg(0));
18295 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18296 return Builder.CreateCall(F, X);
18297 }
18298
18299 // Fastmath by default
18300 case PPC::BI__builtin_ppc_recipdivf:
18301 case PPC::BI__builtin_ppc_recipdivd:
18302 case PPC::BI__builtin_ppc_rsqrtf:
18303 case PPC::BI__builtin_ppc_rsqrtd: {
18304 FastMathFlags FMF = Builder.getFastMathFlags();
18305 Builder.getFastMathFlags().setFast();
18306 llvm::Type *ResultType = ConvertType(E->getType());
18307 Value *X = EmitScalarExpr(E->getArg(0));
18308
18309 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18310 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18311 Value *Y = EmitScalarExpr(E->getArg(1));
18312 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
18313 Builder.getFastMathFlags() &= (FMF);
18314 return FDiv;
18315 }
18316 auto *One = ConstantFP::get(ResultType, 1.0);
18317 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18318 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
18319 Builder.getFastMathFlags() &= (FMF);
18320 return FDiv;
18321 }
18322 case PPC::BI__builtin_ppc_alignx: {
18323 Value *Op0 = EmitScalarExpr(E->getArg(0));
18324 Value *Op1 = EmitScalarExpr(E->getArg(1));
18325 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18326 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18327 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18328 llvm::Value::MaximumAlignment);
18329
18330 emitAlignmentAssumption(Op1, E->getArg(1),
18331 /*The expr loc is sufficient.*/ SourceLocation(),
18332 AlignmentCI, nullptr);
18333 return Op1;
18334 }
18335 case PPC::BI__builtin_ppc_rdlam: {
18336 Value *Op0 = EmitScalarExpr(E->getArg(0));
18337 Value *Op1 = EmitScalarExpr(E->getArg(1));
18338 Value *Op2 = EmitScalarExpr(E->getArg(2));
18339 llvm::Type *Ty = Op0->getType();
18340 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
18341 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
18342 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18343 return Builder.CreateAnd(Rotate, Op2);
18344 }
18345 case PPC::BI__builtin_ppc_load2r: {
18346 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
18347 Value *Op0 = EmitScalarExpr(E->getArg(0));
18348 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
18349 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
18350 }
18351 // FMA variations
18352 case PPC::BI__builtin_ppc_fnmsub:
18353 case PPC::BI__builtin_ppc_fnmsubs:
18354 case PPC::BI__builtin_vsx_xvmaddadp:
18355 case PPC::BI__builtin_vsx_xvmaddasp:
18356 case PPC::BI__builtin_vsx_xvnmaddadp:
18357 case PPC::BI__builtin_vsx_xvnmaddasp:
18358 case PPC::BI__builtin_vsx_xvmsubadp:
18359 case PPC::BI__builtin_vsx_xvmsubasp:
18360 case PPC::BI__builtin_vsx_xvnmsubadp:
18361 case PPC::BI__builtin_vsx_xvnmsubasp: {
18362 llvm::Type *ResultType = ConvertType(E->getType());
18363 Value *X = EmitScalarExpr(E->getArg(0));
18364 Value *Y = EmitScalarExpr(E->getArg(1));
18365 Value *Z = EmitScalarExpr(E->getArg(2));
18366 llvm::Function *F;
18367 if (Builder.getIsFPConstrained())
18368 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18369 else
18370 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18371 switch (BuiltinID) {
18372 case PPC::BI__builtin_vsx_xvmaddadp:
18373 case PPC::BI__builtin_vsx_xvmaddasp:
18374 if (Builder.getIsFPConstrained())
18375 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18376 else
18377 return Builder.CreateCall(F, {X, Y, Z});
18378 case PPC::BI__builtin_vsx_xvnmaddadp:
18379 case PPC::BI__builtin_vsx_xvnmaddasp:
18380 if (Builder.getIsFPConstrained())
18381 return Builder.CreateFNeg(
18382 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
18383 else
18384 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18385 case PPC::BI__builtin_vsx_xvmsubadp:
18386 case PPC::BI__builtin_vsx_xvmsubasp:
18387 if (Builder.getIsFPConstrained())
18388 return Builder.CreateConstrainedFPCall(
18389 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18390 else
18391 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18392 case PPC::BI__builtin_ppc_fnmsub:
18393 case PPC::BI__builtin_ppc_fnmsubs:
18394 case PPC::BI__builtin_vsx_xvnmsubadp:
18395 case PPC::BI__builtin_vsx_xvnmsubasp:
18396 if (Builder.getIsFPConstrained())
18397 return Builder.CreateFNeg(
18398 Builder.CreateConstrainedFPCall(
18399 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
18400 "neg");
18401 else
18402 return Builder.CreateCall(
18403 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
18404 }
18405 llvm_unreachable("Unknown FMA operation");
18406 return nullptr; // Suppress no-return warning
18407 }
18408
18409 case PPC::BI__builtin_vsx_insertword: {
18410 Value *Op0 = EmitScalarExpr(E->getArg(0));
18411 Value *Op1 = EmitScalarExpr(E->getArg(1));
18412 Value *Op2 = EmitScalarExpr(E->getArg(2));
18413 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
18414
18415 // Third argument is a compile time constant int. It must be clamped to
18416 // to the range [0, 12].
18417 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18418 assert(ArgCI &&
18419 "Third arg to xxinsertw intrinsic must be constant integer");
18420 const int64_t MaxIndex = 12;
18421 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18422
18423 // The builtin semantics don't exactly match the xxinsertw instructions
18424 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18425 // word from the first argument, and inserts it in the second argument. The
18426 // instruction extracts the word from its second input register and inserts
18427 // it into its first input register, so swap the first and second arguments.
18428 std::swap(Op0, Op1);
18429
18430 // Need to cast the second argument from a vector of unsigned int to a
18431 // vector of long long.
18432 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18433
18434 if (getTarget().isLittleEndian()) {
18435 // Reverse the double words in the vector we will extract from.
18436 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18437 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
18438
18439 // Reverse the index.
18440 Index = MaxIndex - Index;
18441 }
18442
18443 // Intrinsic expects the first arg to be a vector of int.
18444 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18445 Op2 = ConstantInt::getSigned(Int32Ty, Index);
18446 return Builder.CreateCall(F, {Op0, Op1, Op2});
18447 }
18448
18449 case PPC::BI__builtin_vsx_extractuword: {
18450 Value *Op0 = EmitScalarExpr(E->getArg(0));
18451 Value *Op1 = EmitScalarExpr(E->getArg(1));
18452 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18453
18454 // Intrinsic expects the first argument to be a vector of doublewords.
18455 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18456
18457 // The second argument is a compile time constant int that needs to
18458 // be clamped to the range [0, 12].
18459 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18460 assert(ArgCI &&
18461 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18462 const int64_t MaxIndex = 12;
18463 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18464
18465 if (getTarget().isLittleEndian()) {
18466 // Reverse the index.
18467 Index = MaxIndex - Index;
18468 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18469
18470 // Emit the call, then reverse the double words of the results vector.
18471 Value *Call = Builder.CreateCall(F, {Op0, Op1});
18472
18473 Value *ShuffleCall =
18474 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
18475 return ShuffleCall;
18476 } else {
18477 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18478 return Builder.CreateCall(F, {Op0, Op1});
18479 }
18480 }
18481
18482 case PPC::BI__builtin_vsx_xxpermdi: {
18483 Value *Op0 = EmitScalarExpr(E->getArg(0));
18484 Value *Op1 = EmitScalarExpr(E->getArg(1));
18485 Value *Op2 = EmitScalarExpr(E->getArg(2));
18486 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18487 assert(ArgCI && "Third arg must be constant integer!");
18488
18489 unsigned Index = ArgCI->getZExtValue();
18490 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18491 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18492
18493 // Account for endianness by treating this as just a shuffle. So we use the
18494 // same indices for both LE and BE in order to produce expected results in
18495 // both cases.
18496 int ElemIdx0 = (Index & 2) >> 1;
18497 int ElemIdx1 = 2 + (Index & 1);
18498
18499 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18500 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18501 QualType BIRetType = E->getType();
18502 auto RetTy = ConvertType(BIRetType);
18503 return Builder.CreateBitCast(ShuffleCall, RetTy);
18504 }
18505
18506 case PPC::BI__builtin_vsx_xxsldwi: {
18507 Value *Op0 = EmitScalarExpr(E->getArg(0));
18508 Value *Op1 = EmitScalarExpr(E->getArg(1));
18509 Value *Op2 = EmitScalarExpr(E->getArg(2));
18510 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18511 assert(ArgCI && "Third argument must be a compile time constant");
18512 unsigned Index = ArgCI->getZExtValue() & 0x3;
18513 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18514 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
18515
18516 // Create a shuffle mask
18517 int ElemIdx0;
18518 int ElemIdx1;
18519 int ElemIdx2;
18520 int ElemIdx3;
18521 if (getTarget().isLittleEndian()) {
18522 // Little endian element N comes from element 8+N-Index of the
18523 // concatenated wide vector (of course, using modulo arithmetic on
18524 // the total number of elements).
18525 ElemIdx0 = (8 - Index) % 8;
18526 ElemIdx1 = (9 - Index) % 8;
18527 ElemIdx2 = (10 - Index) % 8;
18528 ElemIdx3 = (11 - Index) % 8;
18529 } else {
18530 // Big endian ElemIdx<N> = Index + N
18531 ElemIdx0 = Index;
18532 ElemIdx1 = Index + 1;
18533 ElemIdx2 = Index + 2;
18534 ElemIdx3 = Index + 3;
18535 }
18536
18537 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18538 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18539 QualType BIRetType = E->getType();
18540 auto RetTy = ConvertType(BIRetType);
18541 return Builder.CreateBitCast(ShuffleCall, RetTy);
18542 }
18543
18544 case PPC::BI__builtin_pack_vector_int128: {
18545 Value *Op0 = EmitScalarExpr(E->getArg(0));
18546 Value *Op1 = EmitScalarExpr(E->getArg(1));
18547 bool isLittleEndian = getTarget().isLittleEndian();
18548 Value *PoisonValue =
18549 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
18550 Value *Res = Builder.CreateInsertElement(
18551 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18552 Res = Builder.CreateInsertElement(Res, Op1,
18553 (uint64_t)(isLittleEndian ? 0 : 1));
18554 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
18555 }
18556
18557 case PPC::BI__builtin_unpack_vector_int128: {
18558 Value *Op0 = EmitScalarExpr(E->getArg(0));
18559 Value *Op1 = EmitScalarExpr(E->getArg(1));
18560 ConstantInt *Index = cast<ConstantInt>(Op1);
18561 Value *Unpacked = Builder.CreateBitCast(
18562 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18563
18564 if (getTarget().isLittleEndian())
18565 Index =
18566 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18567
18568 return Builder.CreateExtractElement(Unpacked, Index);
18569 }
18570
18571 case PPC::BI__builtin_ppc_sthcx: {
18572 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18573 Value *Op0 = EmitScalarExpr(E->getArg(0));
18574 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18575 return Builder.CreateCall(F, {Op0, Op1});
18576 }
18577
18578 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18579 // Some of the MMA instructions accumulate their result into an existing
18580 // accumulator whereas the others generate a new accumulator. So we need to
18581 // use custom code generation to expand a builtin call with a pointer to a
18582 // load (if the corresponding instruction accumulates its result) followed by
18583 // the call to the intrinsic and a store of the result.
18584#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18585 case PPC::BI__builtin_##Name:
18586#include "clang/Basic/BuiltinsPPC.def"
18587 {
18589 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18590 if (E->getArg(i)->getType()->isArrayType())
18591 Ops.push_back(
18592 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18593 else
18594 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18595 // The first argument of these two builtins is a pointer used to store their
18596 // result. However, the llvm intrinsics return their result in multiple
18597 // return values. So, here we emit code extracting these values from the
18598 // intrinsic results and storing them using that pointer.
18599 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18600 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18601 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18602 unsigned NumVecs = 2;
18603 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18604 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18605 NumVecs = 4;
18606 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18607 }
18608 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18609 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18610 Value *Vec = Builder.CreateLoad(Addr);
18611 Value *Call = Builder.CreateCall(F, {Vec});
18612 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18613 Value *Ptr = Ops[0];
18614 for (unsigned i=0; i<NumVecs; i++) {
18615 Value *Vec = Builder.CreateExtractValue(Call, i);
18616 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18617 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18618 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18619 }
18620 return Call;
18621 }
18622 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18623 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18624 // Reverse the order of the operands for LE, so the
18625 // same builtin call can be used on both LE and BE
18626 // without the need for the programmer to swap operands.
18627 // The operands are reversed starting from the second argument,
18628 // the first operand is the pointer to the pair/accumulator
18629 // that is being built.
18630 if (getTarget().isLittleEndian())
18631 std::reverse(Ops.begin() + 1, Ops.end());
18632 }
18633 bool Accumulate;
18634 switch (BuiltinID) {
18635 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18636 case PPC::BI__builtin_##Name: \
18637 ID = Intrinsic::ppc_##Intr; \
18638 Accumulate = Acc; \
18639 break;
18640 #include "clang/Basic/BuiltinsPPC.def"
18641 }
18642 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18643 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18644 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18645 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18646 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18647 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18648 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18649 } else {
18650 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18651 }
18652 Ops.pop_back();
18653 llvm::Function *F = CGM.getIntrinsic(ID);
18654 return Builder.CreateCall(F, Ops, "");
18655 }
18656 SmallVector<Value*, 4> CallOps;
18657 if (Accumulate) {
18658 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18659 Value *Acc = Builder.CreateLoad(Addr);
18660 CallOps.push_back(Acc);
18661 }
18662 for (unsigned i=1; i<Ops.size(); i++)
18663 CallOps.push_back(Ops[i]);
18664 llvm::Function *F = CGM.getIntrinsic(ID);
18665 Value *Call = Builder.CreateCall(F, CallOps);
18666 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
18667 }
18668
18669 case PPC::BI__builtin_ppc_compare_and_swap:
18670 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18671 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18672 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18673 Value *OldVal = Builder.CreateLoad(OldValAddr);
18674 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18675 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18676 Value *Op2 = EmitScalarExpr(E->getArg(2));
18677 auto Pair = EmitAtomicCompareExchange(
18678 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18679 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18680 // Unlike c11's atomic_compare_exchange, according to
18681 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18682 // > In either case, the contents of the memory location specified by addr
18683 // > are copied into the memory location specified by old_val_addr.
18684 // But it hasn't specified storing to OldValAddr is atomic or not and
18685 // which order to use. Now following XL's codegen, treat it as a normal
18686 // store.
18687 Value *LoadedVal = Pair.first.getScalarVal();
18688 Builder.CreateStore(LoadedVal, OldValAddr);
18689 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18690 }
18691 case PPC::BI__builtin_ppc_fetch_and_add:
18692 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18693 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18694 llvm::AtomicOrdering::Monotonic);
18695 }
18696 case PPC::BI__builtin_ppc_fetch_and_and:
18697 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18698 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18699 llvm::AtomicOrdering::Monotonic);
18700 }
18701
18702 case PPC::BI__builtin_ppc_fetch_and_or:
18703 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18704 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18705 llvm::AtomicOrdering::Monotonic);
18706 }
18707 case PPC::BI__builtin_ppc_fetch_and_swap:
18708 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18709 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18710 llvm::AtomicOrdering::Monotonic);
18711 }
18712 case PPC::BI__builtin_ppc_ldarx:
18713 case PPC::BI__builtin_ppc_lwarx:
18714 case PPC::BI__builtin_ppc_lharx:
18715 case PPC::BI__builtin_ppc_lbarx:
18716 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18717 case PPC::BI__builtin_ppc_mfspr: {
18718 Value *Op0 = EmitScalarExpr(E->getArg(0));
18719 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18720 ? Int32Ty
18721 : Int64Ty;
18722 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18723 return Builder.CreateCall(F, {Op0});
18724 }
18725 case PPC::BI__builtin_ppc_mtspr: {
18726 Value *Op0 = EmitScalarExpr(E->getArg(0));
18727 Value *Op1 = EmitScalarExpr(E->getArg(1));
18728 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18729 ? Int32Ty
18730 : Int64Ty;
18731 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18732 return Builder.CreateCall(F, {Op0, Op1});
18733 }
18734 case PPC::BI__builtin_ppc_popcntb: {
18735 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18736 llvm::Type *ArgType = ArgValue->getType();
18737 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18738 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18739 }
18740 case PPC::BI__builtin_ppc_mtfsf: {
18741 // The builtin takes a uint32 that needs to be cast to an
18742 // f64 to be passed to the intrinsic.
18743 Value *Op0 = EmitScalarExpr(E->getArg(0));
18744 Value *Op1 = EmitScalarExpr(E->getArg(1));
18745 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18746 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18747 return Builder.CreateCall(F, {Op0, Cast}, "");
18748 }
18749
18750 case PPC::BI__builtin_ppc_swdiv_nochk:
18751 case PPC::BI__builtin_ppc_swdivs_nochk: {
18752 Value *Op0 = EmitScalarExpr(E->getArg(0));
18753 Value *Op1 = EmitScalarExpr(E->getArg(1));
18754 FastMathFlags FMF = Builder.getFastMathFlags();
18755 Builder.getFastMathFlags().setFast();
18756 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18757 Builder.getFastMathFlags() &= (FMF);
18758 return FDiv;
18759 }
18760 case PPC::BI__builtin_ppc_fric:
18762 *this, E, Intrinsic::rint,
18763 Intrinsic::experimental_constrained_rint))
18764 .getScalarVal();
18765 case PPC::BI__builtin_ppc_frim:
18766 case PPC::BI__builtin_ppc_frims:
18768 *this, E, Intrinsic::floor,
18769 Intrinsic::experimental_constrained_floor))
18770 .getScalarVal();
18771 case PPC::BI__builtin_ppc_frin:
18772 case PPC::BI__builtin_ppc_frins:
18774 *this, E, Intrinsic::round,
18775 Intrinsic::experimental_constrained_round))
18776 .getScalarVal();
18777 case PPC::BI__builtin_ppc_frip:
18778 case PPC::BI__builtin_ppc_frips:
18780 *this, E, Intrinsic::ceil,
18781 Intrinsic::experimental_constrained_ceil))
18782 .getScalarVal();
18783 case PPC::BI__builtin_ppc_friz:
18784 case PPC::BI__builtin_ppc_frizs:
18786 *this, E, Intrinsic::trunc,
18787 Intrinsic::experimental_constrained_trunc))
18788 .getScalarVal();
18789 case PPC::BI__builtin_ppc_fsqrt:
18790 case PPC::BI__builtin_ppc_fsqrts:
18792 *this, E, Intrinsic::sqrt,
18793 Intrinsic::experimental_constrained_sqrt))
18794 .getScalarVal();
18795 case PPC::BI__builtin_ppc_test_data_class: {
18796 Value *Op0 = EmitScalarExpr(E->getArg(0));
18797 Value *Op1 = EmitScalarExpr(E->getArg(1));
18798 return Builder.CreateCall(
18799 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18800 {Op0, Op1}, "test_data_class");
18801 }
18802 case PPC::BI__builtin_ppc_maxfe: {
18803 Value *Op0 = EmitScalarExpr(E->getArg(0));
18804 Value *Op1 = EmitScalarExpr(E->getArg(1));
18805 Value *Op2 = EmitScalarExpr(E->getArg(2));
18806 Value *Op3 = EmitScalarExpr(E->getArg(3));
18807 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18808 {Op0, Op1, Op2, Op3});
18809 }
18810 case PPC::BI__builtin_ppc_maxfl: {
18811 Value *Op0 = EmitScalarExpr(E->getArg(0));
18812 Value *Op1 = EmitScalarExpr(E->getArg(1));
18813 Value *Op2 = EmitScalarExpr(E->getArg(2));
18814 Value *Op3 = EmitScalarExpr(E->getArg(3));
18815 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18816 {Op0, Op1, Op2, Op3});
18817 }
18818 case PPC::BI__builtin_ppc_maxfs: {
18819 Value *Op0 = EmitScalarExpr(E->getArg(0));
18820 Value *Op1 = EmitScalarExpr(E->getArg(1));
18821 Value *Op2 = EmitScalarExpr(E->getArg(2));
18822 Value *Op3 = EmitScalarExpr(E->getArg(3));
18823 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18824 {Op0, Op1, Op2, Op3});
18825 }
18826 case PPC::BI__builtin_ppc_minfe: {
18827 Value *Op0 = EmitScalarExpr(E->getArg(0));
18828 Value *Op1 = EmitScalarExpr(E->getArg(1));
18829 Value *Op2 = EmitScalarExpr(E->getArg(2));
18830 Value *Op3 = EmitScalarExpr(E->getArg(3));
18831 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18832 {Op0, Op1, Op2, Op3});
18833 }
18834 case PPC::BI__builtin_ppc_minfl: {
18835 Value *Op0 = EmitScalarExpr(E->getArg(0));
18836 Value *Op1 = EmitScalarExpr(E->getArg(1));
18837 Value *Op2 = EmitScalarExpr(E->getArg(2));
18838 Value *Op3 = EmitScalarExpr(E->getArg(3));
18839 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18840 {Op0, Op1, Op2, Op3});
18841 }
18842 case PPC::BI__builtin_ppc_minfs: {
18843 Value *Op0 = EmitScalarExpr(E->getArg(0));
18844 Value *Op1 = EmitScalarExpr(E->getArg(1));
18845 Value *Op2 = EmitScalarExpr(E->getArg(2));
18846 Value *Op3 = EmitScalarExpr(E->getArg(3));
18847 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18848 {Op0, Op1, Op2, Op3});
18849 }
18850 case PPC::BI__builtin_ppc_swdiv:
18851 case PPC::BI__builtin_ppc_swdivs: {
18852 Value *Op0 = EmitScalarExpr(E->getArg(0));
18853 Value *Op1 = EmitScalarExpr(E->getArg(1));
18854 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18855 }
18856 case PPC::BI__builtin_ppc_set_fpscr_rn:
18857 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18858 {EmitScalarExpr(E->getArg(0))});
18859 case PPC::BI__builtin_ppc_mffs:
18860 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18861 }
18862}
18863
18864namespace {
18865// If \p E is not null pointer, insert address space cast to match return
18866// type of \p E if necessary.
18867Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18868 const CallExpr *E = nullptr) {
18869 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18870 auto *Call = CGF.Builder.CreateCall(F);
18871 Call->addRetAttr(
18872 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18873 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18874 if (!E)
18875 return Call;
18876 QualType BuiltinRetType = E->getType();
18877 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18878 if (RetTy == Call->getType())
18879 return Call;
18880 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18881}
18882
18883Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18884 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18885 auto *Call = CGF.Builder.CreateCall(F);
18886 Call->addRetAttr(
18887 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18888 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18889 return Call;
18890}
18891
18892// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18893/// Emit code based on Code Object ABI version.
18894/// COV_4 : Emit code to use dispatch ptr
18895/// COV_5+ : Emit code to use implicitarg ptr
18896/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18897/// and use its value for COV_4 or COV_5+ approach. It is used for
18898/// compiling device libraries in an ABI-agnostic way.
18899///
18900/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18901/// clang during compilation of user code.
18902Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18903 llvm::LoadInst *LD;
18904
18905 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18906
18907 if (Cov == CodeObjectVersionKind::COV_None) {
18908 StringRef Name = "__oclc_ABI_version";
18909 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18910 if (!ABIVersionC)
18911 ABIVersionC = new llvm::GlobalVariable(
18912 CGF.CGM.getModule(), CGF.Int32Ty, false,
18913 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18914 llvm::GlobalVariable::NotThreadLocal,
18916
18917 // This load will be eliminated by the IPSCCP because it is constant
18918 // weak_odr without externally_initialized. Either changing it to weak or
18919 // adding externally_initialized will keep the load.
18920 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18921 CGF.CGM.getIntAlign());
18922
18923 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18924 ABIVersion,
18925 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18926
18927 // Indexing the implicit kernarg segment.
18928 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18929 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18930
18931 // Indexing the HSA kernel_dispatch_packet struct.
18932 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18933 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18934
18935 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18936 LD = CGF.Builder.CreateLoad(
18938 } else {
18939 Value *GEP = nullptr;
18940 if (Cov >= CodeObjectVersionKind::COV_5) {
18941 // Indexing the implicit kernarg segment.
18942 GEP = CGF.Builder.CreateConstGEP1_32(
18943 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18944 } else {
18945 // Indexing the HSA kernel_dispatch_packet struct.
18946 GEP = CGF.Builder.CreateConstGEP1_32(
18947 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18948 }
18949 LD = CGF.Builder.CreateLoad(
18951 }
18952
18953 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18954 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18955 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18956 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18957 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18958 llvm::MDNode::get(CGF.getLLVMContext(), {}));
18959 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18960 llvm::MDNode::get(CGF.getLLVMContext(), {}));
18961 return LD;
18962}
18963
18964// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18965Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18966 const unsigned XOffset = 12;
18967 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18968 // Indexing the HSA kernel_dispatch_packet struct.
18969 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18970 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18971 auto *LD = CGF.Builder.CreateLoad(
18973
18974 llvm::MDBuilder MDB(CGF.getLLVMContext());
18975
18976 // Known non-zero.
18977 LD->setMetadata(llvm::LLVMContext::MD_range,
18978 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
18979 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18980 llvm::MDNode::get(CGF.getLLVMContext(), {}));
18981 return LD;
18982}
18983} // namespace
18984
18985// For processing memory ordering and memory scope arguments of various
18986// amdgcn builtins.
18987// \p Order takes a C++11 comptabile memory-ordering specifier and converts
18988// it into LLVM's memory ordering specifier using atomic C ABI, and writes
18989// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18990// specific SyncScopeID and writes it to \p SSID.
18992 llvm::AtomicOrdering &AO,
18993 llvm::SyncScope::ID &SSID) {
18994 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18995
18996 // Map C11/C++11 memory ordering to LLVM memory ordering
18997 assert(llvm::isValidAtomicOrderingCABI(ord));
18998 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18999 case llvm::AtomicOrderingCABI::acquire:
19000 case llvm::AtomicOrderingCABI::consume:
19001 AO = llvm::AtomicOrdering::Acquire;
19002 break;
19003 case llvm::AtomicOrderingCABI::release:
19004 AO = llvm::AtomicOrdering::Release;
19005 break;
19006 case llvm::AtomicOrderingCABI::acq_rel:
19007 AO = llvm::AtomicOrdering::AcquireRelease;
19008 break;
19009 case llvm::AtomicOrderingCABI::seq_cst:
19010 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19011 break;
19012 case llvm::AtomicOrderingCABI::relaxed:
19013 AO = llvm::AtomicOrdering::Monotonic;
19014 break;
19015 }
19016
19017 // Some of the atomic builtins take the scope as a string name.
19018 StringRef scp;
19019 if (llvm::getConstantStringInfo(Scope, scp)) {
19020 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
19021 return;
19022 }
19023
19024 // Older builtins had an enum argument for the memory scope.
19025 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
19026 switch (scope) {
19027 case 0: // __MEMORY_SCOPE_SYSTEM
19028 SSID = llvm::SyncScope::System;
19029 break;
19030 case 1: // __MEMORY_SCOPE_DEVICE
19031 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19032 break;
19033 case 2: // __MEMORY_SCOPE_WRKGRP
19034 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
19035 break;
19036 case 3: // __MEMORY_SCOPE_WVFRNT
19037 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
19038 break;
19039 case 4: // __MEMORY_SCOPE_SINGLE
19040 SSID = llvm::SyncScope::SingleThread;
19041 break;
19042 default:
19043 SSID = llvm::SyncScope::System;
19044 break;
19045 }
19046}
19047
19048llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
19049 unsigned Idx,
19050 const CallExpr *E) {
19051 llvm::Value *Arg = nullptr;
19052 if ((ICEArguments & (1 << Idx)) == 0) {
19053 Arg = EmitScalarExpr(E->getArg(Idx));
19054 } else {
19055 // If this is required to be a constant, constant fold it so that we
19056 // know that the generated intrinsic gets a ConstantInt.
19057 std::optional<llvm::APSInt> Result =
19058 E->getArg(Idx)->getIntegerConstantExpr(getContext());
19059 assert(Result && "Expected argument to be a constant");
19060 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
19061 }
19062 return Arg;
19063}
19064
19065// Return dot product intrinsic that corresponds to the QT scalar type
19066static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19067 if (QT->isFloatingType())
19068 return RT.getFDotIntrinsic();
19069 if (QT->isSignedIntegerType())
19070 return RT.getSDotIntrinsic();
19071 assert(QT->isUnsignedIntegerType());
19072 return RT.getUDotIntrinsic();
19073}
19074
19075static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19077 return RT.getFirstBitSHighIntrinsic();
19078 }
19079
19081 return RT.getFirstBitUHighIntrinsic();
19082}
19083
19084// Return wave active sum that corresponds to the QT scalar type
19085static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
19086 CGHLSLRuntime &RT, QualType QT) {
19087 switch (Arch) {
19088 case llvm::Triple::spirv:
19089 return llvm::Intrinsic::spv_wave_reduce_sum;
19090 case llvm::Triple::dxil: {
19091 if (QT->isUnsignedIntegerType())
19092 return llvm::Intrinsic::dx_wave_reduce_usum;
19093 return llvm::Intrinsic::dx_wave_reduce_sum;
19094 }
19095 default:
19096 llvm_unreachable("Intrinsic WaveActiveSum"
19097 " not supported by target architecture");
19098 }
19099}
19100
19102 const CallExpr *E,
19103 ReturnValueSlot ReturnValue) {
19104 if (!getLangOpts().HLSL)
19105 return nullptr;
19106
19107 switch (BuiltinID) {
19108 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19109 Value *HandleOp = EmitScalarExpr(E->getArg(0));
19110 Value *IndexOp = EmitScalarExpr(E->getArg(1));
19111
19112 // TODO: Map to an hlsl_device address space.
19113 llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
19114
19115 return Builder.CreateIntrinsic(
19116 RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
19117 ArrayRef<Value *>{HandleOp, IndexOp});
19118 }
19119 case Builtin::BI__builtin_hlsl_all: {
19120 Value *Op0 = EmitScalarExpr(E->getArg(0));
19121 return Builder.CreateIntrinsic(
19122 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19123 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19124 "hlsl.all");
19125 }
19126 case Builtin::BI__builtin_hlsl_any: {
19127 Value *Op0 = EmitScalarExpr(E->getArg(0));
19128 return Builder.CreateIntrinsic(
19129 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19130 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19131 "hlsl.any");
19132 }
19133 case Builtin::BI__builtin_hlsl_asdouble:
19134 return handleAsDoubleBuiltin(*this, E);
19135 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19136 Value *OpX = EmitScalarExpr(E->getArg(0));
19137 Value *OpMin = EmitScalarExpr(E->getArg(1));
19138 Value *OpMax = EmitScalarExpr(E->getArg(2));
19139
19140 QualType Ty = E->getArg(0)->getType();
19141 if (auto *VecTy = Ty->getAs<VectorType>())
19142 Ty = VecTy->getElementType();
19143
19144 Intrinsic::ID Intr;
19145 if (Ty->isFloatingType()) {
19146 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
19147 } else if (Ty->isUnsignedIntegerType()) {
19148 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
19149 } else {
19150 assert(Ty->isSignedIntegerType());
19151 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
19152 }
19153 return Builder.CreateIntrinsic(
19154 /*ReturnType=*/OpX->getType(), Intr,
19155 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
19156 }
19157 case Builtin::BI__builtin_hlsl_cross: {
19158 Value *Op0 = EmitScalarExpr(E->getArg(0));
19159 Value *Op1 = EmitScalarExpr(E->getArg(1));
19160 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19161 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19162 "cross operands must have a float representation");
19163 // make sure each vector has exactly 3 elements
19164 assert(
19165 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19166 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19167 "input vectors must have 3 elements each");
19168 return Builder.CreateIntrinsic(
19169 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
19170 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
19171 }
19172 case Builtin::BI__builtin_hlsl_dot: {
19173 Value *Op0 = EmitScalarExpr(E->getArg(0));
19174 Value *Op1 = EmitScalarExpr(E->getArg(1));
19175 llvm::Type *T0 = Op0->getType();
19176 llvm::Type *T1 = Op1->getType();
19177
19178 // If the arguments are scalars, just emit a multiply
19179 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19180 if (T0->isFloatingPointTy())
19181 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
19182
19183 if (T0->isIntegerTy())
19184 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
19185
19186 llvm_unreachable(
19187 "Scalar dot product is only supported on ints and floats.");
19188 }
19189 // For vectors, validate types and emit the appropriate intrinsic
19190
19191 // A VectorSplat should have happened
19192 assert(T0->isVectorTy() && T1->isVectorTy() &&
19193 "Dot product of vector and scalar is not supported.");
19194
19195 auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>();
19196 [[maybe_unused]] auto *VecTy1 =
19197 E->getArg(1)->getType()->castAs<VectorType>();
19198
19199 assert(VecTy0->getElementType() == VecTy1->getElementType() &&
19200 "Dot product of vectors need the same element types.");
19201
19202 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
19203 "Dot product requires vectors to be of the same size.");
19204
19205 return Builder.CreateIntrinsic(
19206 /*ReturnType=*/T0->getScalarType(),
19207 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
19208 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
19209 }
19210 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19211 Value *A = EmitScalarExpr(E->getArg(0));
19212 Value *B = EmitScalarExpr(E->getArg(1));
19213 Value *C = EmitScalarExpr(E->getArg(2));
19214
19215 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19216 return Builder.CreateIntrinsic(
19217 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19218 "hlsl.dot4add.i8packed");
19219 }
19220 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19221 Value *A = EmitScalarExpr(E->getArg(0));
19222 Value *B = EmitScalarExpr(E->getArg(1));
19223 Value *C = EmitScalarExpr(E->getArg(2));
19224
19225 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19226 return Builder.CreateIntrinsic(
19227 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19228 "hlsl.dot4add.u8packed");
19229 }
19230 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19231 Value *X = EmitScalarExpr(E->getArg(0));
19232
19233 return Builder.CreateIntrinsic(
19234 /*ReturnType=*/ConvertType(E->getType()),
19236 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
19237 }
19238 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19239 Value *X = EmitScalarExpr(E->getArg(0));
19240
19241 return Builder.CreateIntrinsic(
19242 /*ReturnType=*/ConvertType(E->getType()),
19243 CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
19244 nullptr, "hlsl.firstbitlow");
19245 }
19246 case Builtin::BI__builtin_hlsl_lerp: {
19247 Value *X = EmitScalarExpr(E->getArg(0));
19248 Value *Y = EmitScalarExpr(E->getArg(1));
19249 Value *S = EmitScalarExpr(E->getArg(2));
19250 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19251 llvm_unreachable("lerp operand must have a float representation");
19252 return Builder.CreateIntrinsic(
19253 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
19254 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
19255 }
19256 case Builtin::BI__builtin_hlsl_normalize: {
19257 Value *X = EmitScalarExpr(E->getArg(0));
19258
19259 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19260 "normalize operand must have a float representation");
19261
19262 return Builder.CreateIntrinsic(
19263 /*ReturnType=*/X->getType(),
19264 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
19265 nullptr, "hlsl.normalize");
19266 }
19267 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19268 Value *X = EmitScalarExpr(E->getArg(0));
19269
19270 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19271 "degree operand must have a float representation");
19272
19273 return Builder.CreateIntrinsic(
19274 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
19275 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
19276 }
19277 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19278 Value *Op0 = EmitScalarExpr(E->getArg(0));
19279 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19280 llvm_unreachable("frac operand must have a float representation");
19281 return Builder.CreateIntrinsic(
19282 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
19283 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
19284}
19285case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19286 Value *Op0 = EmitScalarExpr(E->getArg(0));
19287 llvm::Type *Xty = Op0->getType();
19288 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
19289 if (Xty->isVectorTy()) {
19290 auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
19291 retType = llvm::VectorType::get(
19292 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19293 }
19294 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19295 llvm_unreachable("isinf operand must have a float representation");
19296 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19297 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
19298 }
19299 case Builtin::BI__builtin_hlsl_mad: {
19300 Value *M = EmitScalarExpr(E->getArg(0));
19301 Value *A = EmitScalarExpr(E->getArg(1));
19302 Value *B = EmitScalarExpr(E->getArg(2));
19303 if (E->getArg(0)->getType()->hasFloatingRepresentation())
19304 return Builder.CreateIntrinsic(
19305 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
19306 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
19307
19308 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19309 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19310 return Builder.CreateIntrinsic(
19311 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
19312 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
19313
19314 Value *Mul = Builder.CreateNSWMul(M, A);
19315 return Builder.CreateNSWAdd(Mul, B);
19316 }
19317 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19318 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19319 return Builder.CreateIntrinsic(
19320 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
19321 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
19322
19323 Value *Mul = Builder.CreateNUWMul(M, A);
19324 return Builder.CreateNUWAdd(Mul, B);
19325 }
19326 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19327 Value *Op0 = EmitScalarExpr(E->getArg(0));
19328 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19329 llvm_unreachable("rcp operand must have a float representation");
19330 llvm::Type *Ty = Op0->getType();
19331 llvm::Type *EltTy = Ty->getScalarType();
19332 Constant *One = Ty->isVectorTy()
19333 ? ConstantVector::getSplat(
19334 ElementCount::getFixed(
19335 cast<FixedVectorType>(Ty)->getNumElements()),
19336 ConstantFP::get(EltTy, 1.0))
19337 : ConstantFP::get(EltTy, 1.0);
19338 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
19339 }
19340 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19341 Value *Op0 = EmitScalarExpr(E->getArg(0));
19342 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19343 llvm_unreachable("rsqrt operand must have a float representation");
19344 return Builder.CreateIntrinsic(
19345 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
19346 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
19347 }
19348 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19349 Value *Op0 = EmitScalarExpr(E->getArg(0));
19350 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19351 "saturate operand must have a float representation");
19352 return Builder.CreateIntrinsic(
19353 /*ReturnType=*/Op0->getType(),
19354 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
19355 nullptr, "hlsl.saturate");
19356 }
19357 case Builtin::BI__builtin_hlsl_select: {
19358 Value *OpCond = EmitScalarExpr(E->getArg(0));
19359 RValue RValTrue = EmitAnyExpr(E->getArg(1));
19360 Value *OpTrue =
19361 RValTrue.isScalar()
19362 ? RValTrue.getScalarVal()
19363 : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
19364 RValue RValFalse = EmitAnyExpr(E->getArg(2));
19365 Value *OpFalse =
19366 RValFalse.isScalar()
19367 ? RValFalse.getScalarVal()
19368 : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
19369
19370 Value *SelectVal =
19371 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
19372 if (!RValTrue.isScalar())
19373 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
19374 ReturnValue.isVolatile());
19375
19376 return SelectVal;
19377 }
19378 case Builtin::BI__builtin_hlsl_step: {
19379 Value *Op0 = EmitScalarExpr(E->getArg(0));
19380 Value *Op1 = EmitScalarExpr(E->getArg(1));
19381 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19382 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19383 "step operands must have a float representation");
19384 return Builder.CreateIntrinsic(
19385 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
19386 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
19387 }
19388 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
19389 Value *Op = EmitScalarExpr(E->getArg(0));
19390 assert(Op->getType()->isIntegerTy(1) &&
19391 "Intrinsic WaveActiveAllTrue operand must be a bool");
19392
19393 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
19394 return EmitRuntimeCall(
19395 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19396 }
19397 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19398 Value *Op = EmitScalarExpr(E->getArg(0));
19399 assert(Op->getType()->isIntegerTy(1) &&
19400 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19401
19402 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19403 return EmitRuntimeCall(
19404 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19405 }
19406 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19407 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19408 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19409 return EmitRuntimeCall(
19410 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
19411 ArrayRef{OpExpr});
19412 }
19413 case Builtin::BI__builtin_hlsl_wave_active_sum: {
19414 // Due to the use of variadic arguments, explicitly retreive argument
19415 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19416 llvm::FunctionType *FT = llvm::FunctionType::get(
19417 OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
19418 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
19419 getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
19420 E->getArg(0)->getType());
19421
19422 // Get overloaded name
19423 std::string Name =
19424 Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
19425 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19426 /*Local=*/false,
19427 /*AssumeConvergent=*/true),
19428 ArrayRef{OpExpr}, "hlsl.wave.active.sum");
19429 }
19430 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19431 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19432 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19433 // for the DirectX intrinsic and the demangled builtin name
19434 switch (CGM.getTarget().getTriple().getArch()) {
19435 case llvm::Triple::dxil:
19436 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19437 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
19438 case llvm::Triple::spirv:
19440 llvm::FunctionType::get(IntTy, {}, false),
19441 "__hlsl_wave_get_lane_index", {}, false, true));
19442 default:
19443 llvm_unreachable(
19444 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19445 }
19446 }
19447 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19448 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19449 return EmitRuntimeCall(
19450 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19451 }
19452 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19453 // Due to the use of variadic arguments we must explicitly retreive them and
19454 // create our function type.
19455 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19456 Value *OpIndex = EmitScalarExpr(E->getArg(1));
19457 llvm::FunctionType *FT = llvm::FunctionType::get(
19458 OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
19459 false);
19460
19461 // Get overloaded name
19462 std::string Name =
19463 Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19464 ArrayRef{OpExpr->getType()}, &CGM.getModule());
19465 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19466 /*Local=*/false,
19467 /*AssumeConvergent=*/true),
19468 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
19469 }
19470 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19471 auto *Arg0 = E->getArg(0);
19472 Value *Op0 = EmitScalarExpr(Arg0);
19473 llvm::Type *Xty = Op0->getType();
19474 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
19475 if (Xty->isVectorTy()) {
19476 auto *XVecTy = Arg0->getType()->castAs<VectorType>();
19477 retType = llvm::VectorType::get(
19478 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19479 }
19480 assert((Arg0->getType()->hasFloatingRepresentation() ||
19481 Arg0->getType()->hasIntegerRepresentation()) &&
19482 "sign operand must have a float or int representation");
19483
19485 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19486 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19487 ConstantInt::get(retType, 1), "hlsl.sign");
19488 }
19489
19490 return Builder.CreateIntrinsic(
19491 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
19492 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
19493 }
19494 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19495 Value *Op0 = EmitScalarExpr(E->getArg(0));
19496 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19497 "radians operand must have a float representation");
19498 return Builder.CreateIntrinsic(
19499 /*ReturnType=*/Op0->getType(),
19500 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
19501 nullptr, "hlsl.radians");
19502 }
19503 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19504 Value *ResHandle = EmitScalarExpr(E->getArg(0));
19505 Value *Offset = EmitScalarExpr(E->getArg(1));
19506 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
19507 return Builder.CreateIntrinsic(
19508 /*ReturnType=*/Offset->getType(),
19509 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19510 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
19511 }
19512 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19513
19514 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
19515 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19516 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19517 "asuint operands types mismatch");
19518 return handleHlslSplitdouble(E, this);
19519 }
19520 case Builtin::BI__builtin_hlsl_elementwise_clip:
19521 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19522 "clip operands types mismatch");
19523 return handleHlslClip(E, this);
19524 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19525 Intrinsic::ID ID =
19526 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19527 return EmitRuntimeCall(
19528 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19529 }
19530 }
19531 return nullptr;
19532}
19533
19534void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
19535 const CallExpr *E) {
19536 constexpr const char *Tag = "amdgpu-as";
19537
19538 LLVMContext &Ctx = Inst->getContext();
19540 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
19541 llvm::Value *V = EmitScalarExpr(E->getArg(K));
19542 StringRef AS;
19543 if (llvm::getConstantStringInfo(V, AS)) {
19544 MMRAs.push_back({Tag, AS});
19545 // TODO: Delete the resulting unused constant?
19546 continue;
19547 }
19548 CGM.Error(E->getExprLoc(),
19549 "expected an address space name as a string literal");
19550 }
19551
19552 llvm::sort(MMRAs);
19553 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19554 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19555}
19556
19558 const CallExpr *E) {
19559 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19560 llvm::SyncScope::ID SSID;
19561 switch (BuiltinID) {
19562 case AMDGPU::BI__builtin_amdgcn_div_scale:
19563 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19564 // Translate from the intrinsics's struct return to the builtin's out
19565 // argument.
19566
19567 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
19568
19569 llvm::Value *X = EmitScalarExpr(E->getArg(0));
19570 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
19571 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
19572
19573 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
19574 X->getType());
19575
19576 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
19577
19578 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
19579 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
19580
19581 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
19582
19583 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
19584 Builder.CreateStore(FlagExt, FlagOutPtr);
19585 return Result;
19586 }
19587 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19588 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19589 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19590 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19591 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19592 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
19593
19594 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
19595 Src0->getType());
19596 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
19597 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19598 }
19599
19600 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19601 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19602 Intrinsic::amdgcn_ds_swizzle);
19603 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19604 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19605 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19607 // Find out if any arguments are required to be integer constant
19608 // expressions.
19609 unsigned ICEArguments = 0;
19611 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19612 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19613 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19614 unsigned Size = DataTy->getPrimitiveSizeInBits();
19615 llvm::Type *IntTy =
19616 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
19617 Function *F =
19618 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
19619 ? Intrinsic::amdgcn_mov_dpp8
19620 : Intrinsic::amdgcn_update_dpp,
19621 IntTy);
19622 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
19623 E->getNumArgs() == 2);
19624 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19625 if (InsertOld)
19626 Args.push_back(llvm::PoisonValue::get(IntTy));
19627 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19628 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19629 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19630 Size < 32) {
19631 if (!DataTy->isIntegerTy())
19632 V = Builder.CreateBitCast(
19633 V, llvm::IntegerType::get(Builder.getContext(), Size));
19634 V = Builder.CreateZExtOrBitCast(V, IntTy);
19635 }
19636 llvm::Type *ExpTy =
19637 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19638 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
19639 }
19640 Value *V = Builder.CreateCall(F, Args);
19641 if (Size < 32 && !DataTy->isIntegerTy())
19642 V = Builder.CreateTrunc(
19643 V, llvm::IntegerType::get(Builder.getContext(), Size));
19644 return Builder.CreateTruncOrBitCast(V, DataTy);
19645 }
19646 case AMDGPU::BI__builtin_amdgcn_permlane16:
19647 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19648 return emitBuiltinWithOneOverloadedType<6>(
19649 *this, E,
19650 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19651 ? Intrinsic::amdgcn_permlane16
19652 : Intrinsic::amdgcn_permlanex16);
19653 case AMDGPU::BI__builtin_amdgcn_permlane64:
19654 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19655 Intrinsic::amdgcn_permlane64);
19656 case AMDGPU::BI__builtin_amdgcn_readlane:
19657 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19658 Intrinsic::amdgcn_readlane);
19659 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19660 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19661 Intrinsic::amdgcn_readfirstlane);
19662 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19663 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19664 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19665 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19666 Intrinsic::amdgcn_div_fixup);
19667 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19668 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19669 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
19670 case AMDGPU::BI__builtin_amdgcn_rcp:
19671 case AMDGPU::BI__builtin_amdgcn_rcpf:
19672 case AMDGPU::BI__builtin_amdgcn_rcph:
19673 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
19674 case AMDGPU::BI__builtin_amdgcn_sqrt:
19675 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19676 case AMDGPU::BI__builtin_amdgcn_sqrth:
19677 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19678 Intrinsic::amdgcn_sqrt);
19679 case AMDGPU::BI__builtin_amdgcn_rsq:
19680 case AMDGPU::BI__builtin_amdgcn_rsqf:
19681 case AMDGPU::BI__builtin_amdgcn_rsqh:
19682 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
19683 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19684 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19685 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19686 Intrinsic::amdgcn_rsq_clamp);
19687 case AMDGPU::BI__builtin_amdgcn_sinf:
19688 case AMDGPU::BI__builtin_amdgcn_sinh:
19689 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
19690 case AMDGPU::BI__builtin_amdgcn_cosf:
19691 case AMDGPU::BI__builtin_amdgcn_cosh:
19692 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
19693 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19694 return EmitAMDGPUDispatchPtr(*this, E);
19695 case AMDGPU::BI__builtin_amdgcn_logf:
19696 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
19697 case AMDGPU::BI__builtin_amdgcn_exp2f:
19698 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19699 Intrinsic::amdgcn_exp2);
19700 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19701 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19702 Intrinsic::amdgcn_log_clamp);
19703 case AMDGPU::BI__builtin_amdgcn_ldexp:
19704 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19705 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19706 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19707 llvm::Function *F =
19708 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19709 return Builder.CreateCall(F, {Src0, Src1});
19710 }
19711 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19712 // The raw instruction has a different behavior for out of bounds exponent
19713 // values (implicit truncation instead of saturate to short_min/short_max).
19714 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19715 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19716 llvm::Function *F =
19717 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
19718 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
19719 }
19720 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19721 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19722 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19723 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19724 Intrinsic::amdgcn_frexp_mant);
19725 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19726 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19727 Value *Src0 = EmitScalarExpr(E->getArg(0));
19728 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19729 { Builder.getInt32Ty(), Src0->getType() });
19730 return Builder.CreateCall(F, Src0);
19731 }
19732 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19733 Value *Src0 = EmitScalarExpr(E->getArg(0));
19734 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19735 { Builder.getInt16Ty(), Src0->getType() });
19736 return Builder.CreateCall(F, Src0);
19737 }
19738 case AMDGPU::BI__builtin_amdgcn_fract:
19739 case AMDGPU::BI__builtin_amdgcn_fractf:
19740 case AMDGPU::BI__builtin_amdgcn_fracth:
19741 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19742 Intrinsic::amdgcn_fract);
19743 case AMDGPU::BI__builtin_amdgcn_lerp:
19744 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19745 Intrinsic::amdgcn_lerp);
19746 case AMDGPU::BI__builtin_amdgcn_ubfe:
19747 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19748 Intrinsic::amdgcn_ubfe);
19749 case AMDGPU::BI__builtin_amdgcn_sbfe:
19750 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19751 Intrinsic::amdgcn_sbfe);
19752 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19753 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19754 llvm::Type *ResultType = ConvertType(E->getType());
19755 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19756 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
19757 return Builder.CreateCall(F, { Src });
19758 }
19759 case AMDGPU::BI__builtin_amdgcn_uicmp:
19760 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19761 case AMDGPU::BI__builtin_amdgcn_sicmp:
19762 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19763 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19764 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19765 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19766
19767 // FIXME-GFX10: How should 32 bit mask be handled?
19768 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
19769 { Builder.getInt64Ty(), Src0->getType() });
19770 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19771 }
19772 case AMDGPU::BI__builtin_amdgcn_fcmp:
19773 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19774 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19775 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19776 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19777
19778 // FIXME-GFX10: How should 32 bit mask be handled?
19779 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
19780 { Builder.getInt64Ty(), Src0->getType() });
19781 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19782 }
19783 case AMDGPU::BI__builtin_amdgcn_class:
19784 case AMDGPU::BI__builtin_amdgcn_classf:
19785 case AMDGPU::BI__builtin_amdgcn_classh:
19786 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
19787 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19788 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19789 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19790 Intrinsic::amdgcn_fmed3);
19791 case AMDGPU::BI__builtin_amdgcn_ds_append:
19792 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19793 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19794 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19795 Value *Src0 = EmitScalarExpr(E->getArg(0));
19796 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
19797 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
19798 }
19799 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19800 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19801 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19802 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19803 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19804 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19805 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19806 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19807 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19808 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19809 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19810 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19811 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19812 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
19813 Intrinsic::ID IID;
19814 switch (BuiltinID) {
19815 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19816 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19817 IID = Intrinsic::amdgcn_global_load_tr_b64;
19818 break;
19819 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19820 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19821 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19822 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19823 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19824 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19825 IID = Intrinsic::amdgcn_global_load_tr_b128;
19826 break;
19827 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19828 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
19829 break;
19830 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19831 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
19832 break;
19833 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19834 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
19835 break;
19836 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
19837 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19838 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19839 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
19840 break;
19841 }
19842 llvm::Type *LoadTy = ConvertType(E->getType());
19843 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
19844 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
19845 return Builder.CreateCall(F, {Addr});
19846 }
19847 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
19848 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
19849 {llvm::Type::getInt64Ty(getLLVMContext())});
19850 return Builder.CreateCall(F);
19851 }
19852 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
19853 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
19854 {llvm::Type::getInt64Ty(getLLVMContext())});
19855 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
19856 return Builder.CreateCall(F, {Env});
19857 }
19858 case AMDGPU::BI__builtin_amdgcn_read_exec:
19859 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
19860 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
19861 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
19862 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
19863 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
19864 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
19865 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
19866 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
19867 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
19868 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
19869 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
19870 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
19871 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
19872 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
19873 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
19874
19875 // The builtins take these arguments as vec4 where the last element is
19876 // ignored. The intrinsic takes them as vec3.
19877 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
19878 ArrayRef<int>{0, 1, 2});
19879 RayDir =
19880 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
19881 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
19882 ArrayRef<int>{0, 1, 2});
19883
19884 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
19885 {NodePtr->getType(), RayDir->getType()});
19886 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19887 RayInverseDir, TextureDescr});
19888 }
19889
19890 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19892 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19893 Args.push_back(EmitScalarExpr(E->getArg(i)));
19894
19895 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
19896 Value *Call = Builder.CreateCall(F, Args);
19897 Value *Rtn = Builder.CreateExtractValue(Call, 0);
19898 Value *A = Builder.CreateExtractValue(Call, 1);
19899 llvm::Type *RetTy = ConvertType(E->getType());
19900 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19901 (uint64_t)0);
19902 return Builder.CreateInsertElement(I0, A, 1);
19903 }
19904 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
19905 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
19906 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
19908 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19909 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19910 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
19911 {VT, VT});
19912
19914 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
19915 Args.push_back(EmitScalarExpr(E->getArg(I)));
19916 return Builder.CreateCall(F, Args);
19917 }
19918 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19919 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19920 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19921 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19922 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19923 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19924 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19925 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19926 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19927 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19928 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19929 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19930 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19931 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19932 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19933 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19934 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19935 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19936 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19937 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19938 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19939 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19940 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19941 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19942 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19943 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19944 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19945 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19946 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19947 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19948 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19949 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19950 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19951 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19952 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19953 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19954 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19955 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19956 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19957 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19958 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19959 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19960 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19961 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19962 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19963 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19964 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19965 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19966 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19967 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19968 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19969 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19970 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19971 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19972 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19973 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19974 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19975 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19976 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19977 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
19978
19979 // These operations perform a matrix multiplication and accumulation of
19980 // the form:
19981 // D = A * B + C
19982 // We need to specify one type for matrices AB and one for matrices CD.
19983 // Sparse matrix operations can have different types for A and B as well as
19984 // an additional type for sparsity index.
19985 // Destination type should be put before types used for source operands.
19986 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
19987 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
19988 // There is no need for the variable opsel argument, so always set it to
19989 // "false".
19990 bool AppendFalseForOpselArg = false;
19991 unsigned BuiltinWMMAOp;
19992
19993 switch (BuiltinID) {
19994 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19995 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19996 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19997 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19998 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19999 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20000 break;
20001 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20002 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20003 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20004 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20005 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20006 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20007 break;
20008 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20009 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20010 AppendFalseForOpselArg = true;
20011 [[fallthrough]];
20012 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20013 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20014 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20015 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20016 break;
20017 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20018 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20019 AppendFalseForOpselArg = true;
20020 [[fallthrough]];
20021 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20022 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20023 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20024 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20025 break;
20026 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20027 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20028 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20029 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20030 break;
20031 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20032 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20033 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20034 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20035 break;
20036 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20037 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20038 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20039 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20040 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20041 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20042 break;
20043 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20044 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20045 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20046 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20047 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20048 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20049 break;
20050 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20051 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20052 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20053 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20054 break;
20055 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20056 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20057 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20058 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20059 break;
20060 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20061 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20062 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20063 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20064 break;
20065 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20066 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20067 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20068 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20069 break;
20070 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20071 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20072 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20073 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20074 break;
20075 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20076 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20077 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20078 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20079 break;
20080 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20081 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20082 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20083 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20084 break;
20085 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20086 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20087 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20088 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20089 break;
20090 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20091 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20092 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20093 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20094 break;
20095 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20096 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20097 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20098 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20099 break;
20100 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20101 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20102 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20103 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20104 break;
20105 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20106 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20107 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20108 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20109 break;
20110 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20111 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20112 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20113 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20114 break;
20115 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20116 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20117 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20118 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20119 break;
20120 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20121 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20122 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20123 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20124 break;
20125 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20126 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20127 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20128 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20129 break;
20130 }
20131
20133 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20134 Args.push_back(EmitScalarExpr(E->getArg(i)));
20135 if (AppendFalseForOpselArg)
20136 Args.push_back(Builder.getFalse());
20137
20139 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
20140 ArgTypes.push_back(Args[ArgIdx]->getType());
20141
20142 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
20143 return Builder.CreateCall(F, Args);
20144 }
20145
20146 // amdgcn workitem
20147 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20148 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
20149 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20150 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
20151 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20152 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
20153
20154 // amdgcn workgroup size
20155 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20156 return EmitAMDGPUWorkGroupSize(*this, 0);
20157 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20158 return EmitAMDGPUWorkGroupSize(*this, 1);
20159 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20160 return EmitAMDGPUWorkGroupSize(*this, 2);
20161
20162 // amdgcn grid size
20163 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20164 return EmitAMDGPUGridSize(*this, 0);
20165 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20166 return EmitAMDGPUGridSize(*this, 1);
20167 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20168 return EmitAMDGPUGridSize(*this, 2);
20169
20170 // r600 intrinsics
20171 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20172 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20173 return emitBuiltinWithOneOverloadedType<1>(*this, E,
20174 Intrinsic::r600_recipsqrt_ieee);
20175 case AMDGPU::BI__builtin_r600_read_tidig_x:
20176 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
20177 case AMDGPU::BI__builtin_r600_read_tidig_y:
20178 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
20179 case AMDGPU::BI__builtin_r600_read_tidig_z:
20180 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
20181 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20182 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
20183 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
20184 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
20185 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
20186 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20187 }
20188 case AMDGPU::BI__builtin_amdgcn_fence: {
20190 EmitScalarExpr(E->getArg(1)), AO, SSID);
20191 FenceInst *Fence = Builder.CreateFence(AO, SSID);
20192 if (E->getNumArgs() > 2)
20194 return Fence;
20195 }
20196 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20197 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20198 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20199 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20200 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20201 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20202 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20203 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20204 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20205 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20206 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20207 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20208 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20209 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20210 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20211 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20212 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20213 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20214 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20215 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20216 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20217 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20218 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20219 llvm::AtomicRMWInst::BinOp BinOp;
20220 switch (BuiltinID) {
20221 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20222 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20223 BinOp = llvm::AtomicRMWInst::UIncWrap;
20224 break;
20225 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20226 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20227 BinOp = llvm::AtomicRMWInst::UDecWrap;
20228 break;
20229 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20230 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20231 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20232 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20233 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20234 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20235 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20236 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20237 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20238 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20239 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20240 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20241 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20242 BinOp = llvm::AtomicRMWInst::FAdd;
20243 break;
20244 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20245 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20246 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20247 BinOp = llvm::AtomicRMWInst::FMin;
20248 break;
20249 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20250 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20251 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20252 BinOp = llvm::AtomicRMWInst::FMax;
20253 break;
20254 }
20255
20256 Address Ptr = CheckAtomicAlignment(*this, E);
20257 Value *Val = EmitScalarExpr(E->getArg(1));
20258 llvm::Type *OrigTy = Val->getType();
20259 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
20260
20261 bool Volatile;
20262
20263 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20264 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20265 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20266 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20267 Volatile =
20268 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
20269 } else {
20270 // Infer volatile from the passed type.
20271 Volatile =
20273 }
20274
20275 if (E->getNumArgs() >= 4) {
20276 // Some of the builtins have explicit ordering and scope arguments.
20278 EmitScalarExpr(E->getArg(3)), AO, SSID);
20279 } else {
20280 // Most of the builtins do not have syncscope/order arguments. For DS
20281 // atomics the scope doesn't really matter, as they implicitly operate at
20282 // workgroup scope.
20283 //
20284 // The global/flat cases need to use agent scope to consistently produce
20285 // the native instruction instead of a cmpxchg expansion.
20286 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
20287 AO = AtomicOrdering::Monotonic;
20288
20289 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20290 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20291 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20292 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20293 llvm::Type *V2BF16Ty = FixedVectorType::get(
20294 llvm::Type::getBFloatTy(Builder.getContext()), 2);
20295 Val = Builder.CreateBitCast(Val, V2BF16Ty);
20296 }
20297 }
20298
20299 llvm::AtomicRMWInst *RMW =
20300 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
20301 if (Volatile)
20302 RMW->setVolatile(true);
20303
20304 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
20305 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20306 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20307 // instruction for flat and global operations.
20308 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
20309 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
20310
20311 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20312 // instruction, but this only matters for float fadd.
20313 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
20314 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
20315 }
20316
20317 return Builder.CreateBitCast(RMW, OrigTy);
20318 }
20319 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20320 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20321 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
20322 llvm::Type *ResultType = ConvertType(E->getType());
20323 // s_sendmsg_rtn is mangled using return type only.
20324 Function *F =
20325 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
20326 return Builder.CreateCall(F, {Arg});
20327 }
20328 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20329 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20330 // Because builtin types are limited, and the intrinsic uses a struct/pair
20331 // output, marshal the pair-of-i32 to <2 x i32>.
20332 Value *VDstOld = EmitScalarExpr(E->getArg(0));
20333 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
20334 Value *FI = EmitScalarExpr(E->getArg(2));
20335 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
20336 Function *F =
20337 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20338 ? Intrinsic::amdgcn_permlane16_swap
20339 : Intrinsic::amdgcn_permlane32_swap);
20340 llvm::CallInst *Call =
20341 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20342
20343 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
20344 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
20345
20346 llvm::Type *ResultType = ConvertType(E->getType());
20347
20348 llvm::Value *Insert0 = Builder.CreateInsertElement(
20349 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20350 llvm::Value *AsVector =
20351 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20352 return AsVector;
20353 }
20354 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20355 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20356 return emitBuiltinWithOneOverloadedType<4>(*this, E,
20357 Intrinsic::amdgcn_bitop3);
20358 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20359 return emitBuiltinWithOneOverloadedType<4>(
20360 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20361 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20362 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20363 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20364 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20365 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20366 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20367 return emitBuiltinWithOneOverloadedType<5>(
20368 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20369 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20370 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20371 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20372 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20373 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20374 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20375 llvm::Type *RetTy = nullptr;
20376 switch (BuiltinID) {
20377 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20378 RetTy = Int8Ty;
20379 break;
20380 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20381 RetTy = Int16Ty;
20382 break;
20383 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20384 RetTy = Int32Ty;
20385 break;
20386 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20387 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
20388 break;
20389 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20390 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
20391 break;
20392 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20393 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
20394 break;
20395 }
20396 Function *F =
20397 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
20398 return Builder.CreateCall(
20399 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
20400 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
20401 }
20402 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20403 return emitBuiltinWithOneOverloadedType<2>(
20404 *this, E, Intrinsic::amdgcn_s_prefetch_data);
20405 default:
20406 return nullptr;
20407 }
20408}
20409
20411 const CallExpr *E) {
20412 switch (BuiltinID) {
20413 case SPIRV::BI__builtin_spirv_distance: {
20414 Value *X = EmitScalarExpr(E->getArg(0));
20415 Value *Y = EmitScalarExpr(E->getArg(1));
20416 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20417 E->getArg(1)->getType()->hasFloatingRepresentation() &&
20418 "Distance operands must have a float representation");
20419 assert(E->getArg(0)->getType()->isVectorType() &&
20420 E->getArg(1)->getType()->isVectorType() &&
20421 "Distance operands must be a vector");
20422 return Builder.CreateIntrinsic(
20423 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance,
20424 ArrayRef<Value *>{X, Y}, nullptr, "spv.distance");
20425 }
20426 case SPIRV::BI__builtin_spirv_length: {
20427 Value *X = EmitScalarExpr(E->getArg(0));
20428 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20429 "length operand must have a float representation");
20430 assert(E->getArg(0)->getType()->isVectorType() &&
20431 "length operand must be a vector");
20432 return Builder.CreateIntrinsic(
20433 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_length,
20434 ArrayRef<Value *>{X}, nullptr, "spv.length");
20435 }
20436 }
20437 return nullptr;
20438}
20439
20440/// Handle a SystemZ function in which the final argument is a pointer
20441/// to an int that receives the post-instruction CC value. At the LLVM level
20442/// this is represented as a function that returns a {result, cc} pair.
20444 unsigned IntrinsicID,
20445 const CallExpr *E) {
20446 unsigned NumArgs = E->getNumArgs() - 1;
20447 SmallVector<Value *, 8> Args(NumArgs);
20448 for (unsigned I = 0; I < NumArgs; ++I)
20449 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
20450 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
20451 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
20452 Value *Call = CGF.Builder.CreateCall(F, Args);
20453 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
20454 CGF.Builder.CreateStore(CC, CCPtr);
20455 return CGF.Builder.CreateExtractValue(Call, 0);
20456}
20457
20459 const CallExpr *E) {
20460 switch (BuiltinID) {
20461 case SystemZ::BI__builtin_tbegin: {
20462 Value *TDB = EmitScalarExpr(E->getArg(0));
20463 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20464 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
20465 return Builder.CreateCall(F, {TDB, Control});
20466 }
20467 case SystemZ::BI__builtin_tbegin_nofloat: {
20468 Value *TDB = EmitScalarExpr(E->getArg(0));
20469 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20470 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
20471 return Builder.CreateCall(F, {TDB, Control});
20472 }
20473 case SystemZ::BI__builtin_tbeginc: {
20474 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
20475 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
20476 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
20477 return Builder.CreateCall(F, {TDB, Control});
20478 }
20479 case SystemZ::BI__builtin_tabort: {
20480 Value *Data = EmitScalarExpr(E->getArg(0));
20481 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
20482 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
20483 }
20484 case SystemZ::BI__builtin_non_tx_store: {
20485 Value *Address = EmitScalarExpr(E->getArg(0));
20486 Value *Data = EmitScalarExpr(E->getArg(1));
20487 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
20488 return Builder.CreateCall(F, {Data, Address});
20489 }
20490
20491 // Vector builtins. Note that most vector builtins are mapped automatically
20492 // to target-specific LLVM intrinsics. The ones handled specially here can
20493 // be represented via standard LLVM IR, which is preferable to enable common
20494 // LLVM optimizations.
20495
20496 case SystemZ::BI__builtin_s390_vclzb:
20497 case SystemZ::BI__builtin_s390_vclzh:
20498 case SystemZ::BI__builtin_s390_vclzf:
20499 case SystemZ::BI__builtin_s390_vclzg:
20500 case SystemZ::BI__builtin_s390_vclzq: {
20501 llvm::Type *ResultType = ConvertType(E->getType());
20502 Value *X = EmitScalarExpr(E->getArg(0));
20503 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20504 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
20505 return Builder.CreateCall(F, {X, Undef});
20506 }
20507
20508 case SystemZ::BI__builtin_s390_vctzb:
20509 case SystemZ::BI__builtin_s390_vctzh:
20510 case SystemZ::BI__builtin_s390_vctzf:
20511 case SystemZ::BI__builtin_s390_vctzg:
20512 case SystemZ::BI__builtin_s390_vctzq: {
20513 llvm::Type *ResultType = ConvertType(E->getType());
20514 Value *X = EmitScalarExpr(E->getArg(0));
20515 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20516 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
20517 return Builder.CreateCall(F, {X, Undef});
20518 }
20519
20520 case SystemZ::BI__builtin_s390_verllb:
20521 case SystemZ::BI__builtin_s390_verllh:
20522 case SystemZ::BI__builtin_s390_verllf:
20523 case SystemZ::BI__builtin_s390_verllg: {
20524 llvm::Type *ResultType = ConvertType(E->getType());
20525 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20526 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20527 // Splat scalar rotate amount to vector type.
20528 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20529 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
20530 Amt = Builder.CreateVectorSplat(NumElts, Amt);
20531 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20532 return Builder.CreateCall(F, { Src, Src, Amt });
20533 }
20534
20535 case SystemZ::BI__builtin_s390_verllvb:
20536 case SystemZ::BI__builtin_s390_verllvh:
20537 case SystemZ::BI__builtin_s390_verllvf:
20538 case SystemZ::BI__builtin_s390_verllvg: {
20539 llvm::Type *ResultType = ConvertType(E->getType());
20540 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20541 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20542 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20543 return Builder.CreateCall(F, { Src, Src, Amt });
20544 }
20545
20546 case SystemZ::BI__builtin_s390_vfsqsb:
20547 case SystemZ::BI__builtin_s390_vfsqdb: {
20548 llvm::Type *ResultType = ConvertType(E->getType());
20549 Value *X = EmitScalarExpr(E->getArg(0));
20550 if (Builder.getIsFPConstrained()) {
20551 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
20552 return Builder.CreateConstrainedFPCall(F, { X });
20553 } else {
20554 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
20555 return Builder.CreateCall(F, X);
20556 }
20557 }
20558 case SystemZ::BI__builtin_s390_vfmasb:
20559 case SystemZ::BI__builtin_s390_vfmadb: {
20560 llvm::Type *ResultType = ConvertType(E->getType());
20561 Value *X = EmitScalarExpr(E->getArg(0));
20562 Value *Y = EmitScalarExpr(E->getArg(1));
20563 Value *Z = EmitScalarExpr(E->getArg(2));
20564 if (Builder.getIsFPConstrained()) {
20565 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20566 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
20567 } else {
20568 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20569 return Builder.CreateCall(F, {X, Y, Z});
20570 }
20571 }
20572 case SystemZ::BI__builtin_s390_vfmssb:
20573 case SystemZ::BI__builtin_s390_vfmsdb: {
20574 llvm::Type *ResultType = ConvertType(E->getType());
20575 Value *X = EmitScalarExpr(E->getArg(0));
20576 Value *Y = EmitScalarExpr(E->getArg(1));
20577 Value *Z = EmitScalarExpr(E->getArg(2));
20578 if (Builder.getIsFPConstrained()) {
20579 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20580 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20581 } else {
20582 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20583 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20584 }
20585 }
20586 case SystemZ::BI__builtin_s390_vfnmasb:
20587 case SystemZ::BI__builtin_s390_vfnmadb: {
20588 llvm::Type *ResultType = ConvertType(E->getType());
20589 Value *X = EmitScalarExpr(E->getArg(0));
20590 Value *Y = EmitScalarExpr(E->getArg(1));
20591 Value *Z = EmitScalarExpr(E->getArg(2));
20592 if (Builder.getIsFPConstrained()) {
20593 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20594 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
20595 } else {
20596 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20597 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
20598 }
20599 }
20600 case SystemZ::BI__builtin_s390_vfnmssb:
20601 case SystemZ::BI__builtin_s390_vfnmsdb: {
20602 llvm::Type *ResultType = ConvertType(E->getType());
20603 Value *X = EmitScalarExpr(E->getArg(0));
20604 Value *Y = EmitScalarExpr(E->getArg(1));
20605 Value *Z = EmitScalarExpr(E->getArg(2));
20606 if (Builder.getIsFPConstrained()) {
20607 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20608 Value *NegZ = Builder.CreateFNeg(Z, "sub");
20609 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20610 } else {
20611 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20612 Value *NegZ = Builder.CreateFNeg(Z, "neg");
20613 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
20614 }
20615 }
20616 case SystemZ::BI__builtin_s390_vflpsb:
20617 case SystemZ::BI__builtin_s390_vflpdb: {
20618 llvm::Type *ResultType = ConvertType(E->getType());
20619 Value *X = EmitScalarExpr(E->getArg(0));
20620 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20621 return Builder.CreateCall(F, X);
20622 }
20623 case SystemZ::BI__builtin_s390_vflnsb:
20624 case SystemZ::BI__builtin_s390_vflndb: {
20625 llvm::Type *ResultType = ConvertType(E->getType());
20626 Value *X = EmitScalarExpr(E->getArg(0));
20627 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20628 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
20629 }
20630 case SystemZ::BI__builtin_s390_vfisb:
20631 case SystemZ::BI__builtin_s390_vfidb: {
20632 llvm::Type *ResultType = ConvertType(E->getType());
20633 Value *X = EmitScalarExpr(E->getArg(0));
20634 // Constant-fold the M4 and M5 mask arguments.
20635 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
20636 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20637 // Check whether this instance can be represented via a LLVM standard
20638 // intrinsic. We only support some combinations of M4 and M5.
20639 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20640 Intrinsic::ID CI;
20641 switch (M4.getZExtValue()) {
20642 default: break;
20643 case 0: // IEEE-inexact exception allowed
20644 switch (M5.getZExtValue()) {
20645 default: break;
20646 case 0: ID = Intrinsic::rint;
20647 CI = Intrinsic::experimental_constrained_rint; break;
20648 }
20649 break;
20650 case 4: // IEEE-inexact exception suppressed
20651 switch (M5.getZExtValue()) {
20652 default: break;
20653 case 0: ID = Intrinsic::nearbyint;
20654 CI = Intrinsic::experimental_constrained_nearbyint; break;
20655 case 1: ID = Intrinsic::round;
20656 CI = Intrinsic::experimental_constrained_round; break;
20657 case 5: ID = Intrinsic::trunc;
20658 CI = Intrinsic::experimental_constrained_trunc; break;
20659 case 6: ID = Intrinsic::ceil;
20660 CI = Intrinsic::experimental_constrained_ceil; break;
20661 case 7: ID = Intrinsic::floor;
20662 CI = Intrinsic::experimental_constrained_floor; break;
20663 }
20664 break;
20665 }
20666 if (ID != Intrinsic::not_intrinsic) {
20667 if (Builder.getIsFPConstrained()) {
20668 Function *F = CGM.getIntrinsic(CI, ResultType);
20669 return Builder.CreateConstrainedFPCall(F, X);
20670 } else {
20671 Function *F = CGM.getIntrinsic(ID, ResultType);
20672 return Builder.CreateCall(F, X);
20673 }
20674 }
20675 switch (BuiltinID) { // FIXME: constrained version?
20676 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
20677 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
20678 default: llvm_unreachable("Unknown BuiltinID");
20679 }
20680 Function *F = CGM.getIntrinsic(ID);
20681 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20682 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
20683 return Builder.CreateCall(F, {X, M4Value, M5Value});
20684 }
20685 case SystemZ::BI__builtin_s390_vfmaxsb:
20686 case SystemZ::BI__builtin_s390_vfmaxdb: {
20687 llvm::Type *ResultType = ConvertType(E->getType());
20688 Value *X = EmitScalarExpr(E->getArg(0));
20689 Value *Y = EmitScalarExpr(E->getArg(1));
20690 // Constant-fold the M4 mask argument.
20691 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20692 // Check whether this instance can be represented via a LLVM standard
20693 // intrinsic. We only support some values of M4.
20694 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20695 Intrinsic::ID CI;
20696 switch (M4.getZExtValue()) {
20697 default: break;
20698 case 4: ID = Intrinsic::maxnum;
20699 CI = Intrinsic::experimental_constrained_maxnum; break;
20700 }
20701 if (ID != Intrinsic::not_intrinsic) {
20702 if (Builder.getIsFPConstrained()) {
20703 Function *F = CGM.getIntrinsic(CI, ResultType);
20704 return Builder.CreateConstrainedFPCall(F, {X, Y});
20705 } else {
20706 Function *F = CGM.getIntrinsic(ID, ResultType);
20707 return Builder.CreateCall(F, {X, Y});
20708 }
20709 }
20710 switch (BuiltinID) {
20711 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
20712 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
20713 default: llvm_unreachable("Unknown BuiltinID");
20714 }
20715 Function *F = CGM.getIntrinsic(ID);
20716 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20717 return Builder.CreateCall(F, {X, Y, M4Value});
20718 }
20719 case SystemZ::BI__builtin_s390_vfminsb:
20720 case SystemZ::BI__builtin_s390_vfmindb: {
20721 llvm::Type *ResultType = ConvertType(E->getType());
20722 Value *X = EmitScalarExpr(E->getArg(0));
20723 Value *Y = EmitScalarExpr(E->getArg(1));
20724 // Constant-fold the M4 mask argument.
20725 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20726 // Check whether this instance can be represented via a LLVM standard
20727 // intrinsic. We only support some values of M4.
20728 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20729 Intrinsic::ID CI;
20730 switch (M4.getZExtValue()) {
20731 default: break;
20732 case 4: ID = Intrinsic::minnum;
20733 CI = Intrinsic::experimental_constrained_minnum; break;
20734 }
20735 if (ID != Intrinsic::not_intrinsic) {
20736 if (Builder.getIsFPConstrained()) {
20737 Function *F = CGM.getIntrinsic(CI, ResultType);
20738 return Builder.CreateConstrainedFPCall(F, {X, Y});
20739 } else {
20740 Function *F = CGM.getIntrinsic(ID, ResultType);
20741 return Builder.CreateCall(F, {X, Y});
20742 }
20743 }
20744 switch (BuiltinID) {
20745 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
20746 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
20747 default: llvm_unreachable("Unknown BuiltinID");
20748 }
20749 Function *F = CGM.getIntrinsic(ID);
20750 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20751 return Builder.CreateCall(F, {X, Y, M4Value});
20752 }
20753
20754 case SystemZ::BI__builtin_s390_vlbrh:
20755 case SystemZ::BI__builtin_s390_vlbrf:
20756 case SystemZ::BI__builtin_s390_vlbrg:
20757 case SystemZ::BI__builtin_s390_vlbrq: {
20758 llvm::Type *ResultType = ConvertType(E->getType());
20759 Value *X = EmitScalarExpr(E->getArg(0));
20760 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
20761 return Builder.CreateCall(F, X);
20762 }
20763
20764 // Vector intrinsics that output the post-instruction CC value.
20765
20766#define INTRINSIC_WITH_CC(NAME) \
20767 case SystemZ::BI__builtin_##NAME: \
20768 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20769
20770 INTRINSIC_WITH_CC(s390_vpkshs);
20771 INTRINSIC_WITH_CC(s390_vpksfs);
20772 INTRINSIC_WITH_CC(s390_vpksgs);
20773
20774 INTRINSIC_WITH_CC(s390_vpklshs);
20775 INTRINSIC_WITH_CC(s390_vpklsfs);
20776 INTRINSIC_WITH_CC(s390_vpklsgs);
20777
20778 INTRINSIC_WITH_CC(s390_vceqbs);
20779 INTRINSIC_WITH_CC(s390_vceqhs);
20780 INTRINSIC_WITH_CC(s390_vceqfs);
20781 INTRINSIC_WITH_CC(s390_vceqgs);
20782 INTRINSIC_WITH_CC(s390_vceqqs);
20783
20784 INTRINSIC_WITH_CC(s390_vchbs);
20785 INTRINSIC_WITH_CC(s390_vchhs);
20786 INTRINSIC_WITH_CC(s390_vchfs);
20787 INTRINSIC_WITH_CC(s390_vchgs);
20788 INTRINSIC_WITH_CC(s390_vchqs);
20789
20790 INTRINSIC_WITH_CC(s390_vchlbs);
20791 INTRINSIC_WITH_CC(s390_vchlhs);
20792 INTRINSIC_WITH_CC(s390_vchlfs);
20793 INTRINSIC_WITH_CC(s390_vchlgs);
20794 INTRINSIC_WITH_CC(s390_vchlqs);
20795
20796 INTRINSIC_WITH_CC(s390_vfaebs);
20797 INTRINSIC_WITH_CC(s390_vfaehs);
20798 INTRINSIC_WITH_CC(s390_vfaefs);
20799
20800 INTRINSIC_WITH_CC(s390_vfaezbs);
20801 INTRINSIC_WITH_CC(s390_vfaezhs);
20802 INTRINSIC_WITH_CC(s390_vfaezfs);
20803
20804 INTRINSIC_WITH_CC(s390_vfeebs);
20805 INTRINSIC_WITH_CC(s390_vfeehs);
20806 INTRINSIC_WITH_CC(s390_vfeefs);
20807
20808 INTRINSIC_WITH_CC(s390_vfeezbs);
20809 INTRINSIC_WITH_CC(s390_vfeezhs);
20810 INTRINSIC_WITH_CC(s390_vfeezfs);
20811
20812 INTRINSIC_WITH_CC(s390_vfenebs);
20813 INTRINSIC_WITH_CC(s390_vfenehs);
20814 INTRINSIC_WITH_CC(s390_vfenefs);
20815
20816 INTRINSIC_WITH_CC(s390_vfenezbs);
20817 INTRINSIC_WITH_CC(s390_vfenezhs);
20818 INTRINSIC_WITH_CC(s390_vfenezfs);
20819
20820 INTRINSIC_WITH_CC(s390_vistrbs);
20821 INTRINSIC_WITH_CC(s390_vistrhs);
20822 INTRINSIC_WITH_CC(s390_vistrfs);
20823
20824 INTRINSIC_WITH_CC(s390_vstrcbs);
20825 INTRINSIC_WITH_CC(s390_vstrchs);
20826 INTRINSIC_WITH_CC(s390_vstrcfs);
20827
20828 INTRINSIC_WITH_CC(s390_vstrczbs);
20829 INTRINSIC_WITH_CC(s390_vstrczhs);
20830 INTRINSIC_WITH_CC(s390_vstrczfs);
20831
20832 INTRINSIC_WITH_CC(s390_vfcesbs);
20833 INTRINSIC_WITH_CC(s390_vfcedbs);
20834 INTRINSIC_WITH_CC(s390_vfchsbs);
20835 INTRINSIC_WITH_CC(s390_vfchdbs);
20836 INTRINSIC_WITH_CC(s390_vfchesbs);
20837 INTRINSIC_WITH_CC(s390_vfchedbs);
20838
20839 INTRINSIC_WITH_CC(s390_vftcisb);
20840 INTRINSIC_WITH_CC(s390_vftcidb);
20841
20842 INTRINSIC_WITH_CC(s390_vstrsb);
20843 INTRINSIC_WITH_CC(s390_vstrsh);
20844 INTRINSIC_WITH_CC(s390_vstrsf);
20845
20846 INTRINSIC_WITH_CC(s390_vstrszb);
20847 INTRINSIC_WITH_CC(s390_vstrszh);
20848 INTRINSIC_WITH_CC(s390_vstrszf);
20849
20850#undef INTRINSIC_WITH_CC
20851
20852 default:
20853 return nullptr;
20854 }
20855}
20856
20857namespace {
20858// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
20859struct NVPTXMmaLdstInfo {
20860 unsigned NumResults; // Number of elements to load/store
20861 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
20862 unsigned IID_col;
20863 unsigned IID_row;
20864};
20865
20866#define MMA_INTR(geom_op_type, layout) \
20867 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
20868#define MMA_LDST(n, geom_op_type) \
20869 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
20870
20871static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
20872 switch (BuiltinID) {
20873 // FP MMA loads
20874 case NVPTX::BI__hmma_m16n16k16_ld_a:
20875 return MMA_LDST(8, m16n16k16_load_a_f16);
20876 case NVPTX::BI__hmma_m16n16k16_ld_b:
20877 return MMA_LDST(8, m16n16k16_load_b_f16);
20878 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20879 return MMA_LDST(4, m16n16k16_load_c_f16);
20880 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20881 return MMA_LDST(8, m16n16k16_load_c_f32);
20882 case NVPTX::BI__hmma_m32n8k16_ld_a:
20883 return MMA_LDST(8, m32n8k16_load_a_f16);
20884 case NVPTX::BI__hmma_m32n8k16_ld_b:
20885 return MMA_LDST(8, m32n8k16_load_b_f16);
20886 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20887 return MMA_LDST(4, m32n8k16_load_c_f16);
20888 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20889 return MMA_LDST(8, m32n8k16_load_c_f32);
20890 case NVPTX::BI__hmma_m8n32k16_ld_a:
20891 return MMA_LDST(8, m8n32k16_load_a_f16);
20892 case NVPTX::BI__hmma_m8n32k16_ld_b:
20893 return MMA_LDST(8, m8n32k16_load_b_f16);
20894 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20895 return MMA_LDST(4, m8n32k16_load_c_f16);
20896 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20897 return MMA_LDST(8, m8n32k16_load_c_f32);
20898
20899 // Integer MMA loads
20900 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20901 return MMA_LDST(2, m16n16k16_load_a_s8);
20902 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20903 return MMA_LDST(2, m16n16k16_load_a_u8);
20904 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20905 return MMA_LDST(2, m16n16k16_load_b_s8);
20906 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20907 return MMA_LDST(2, m16n16k16_load_b_u8);
20908 case NVPTX::BI__imma_m16n16k16_ld_c:
20909 return MMA_LDST(8, m16n16k16_load_c_s32);
20910 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20911 return MMA_LDST(4, m32n8k16_load_a_s8);
20912 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20913 return MMA_LDST(4, m32n8k16_load_a_u8);
20914 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20915 return MMA_LDST(1, m32n8k16_load_b_s8);
20916 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20917 return MMA_LDST(1, m32n8k16_load_b_u8);
20918 case NVPTX::BI__imma_m32n8k16_ld_c:
20919 return MMA_LDST(8, m32n8k16_load_c_s32);
20920 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20921 return MMA_LDST(1, m8n32k16_load_a_s8);
20922 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20923 return MMA_LDST(1, m8n32k16_load_a_u8);
20924 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20925 return MMA_LDST(4, m8n32k16_load_b_s8);
20926 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20927 return MMA_LDST(4, m8n32k16_load_b_u8);
20928 case NVPTX::BI__imma_m8n32k16_ld_c:
20929 return MMA_LDST(8, m8n32k16_load_c_s32);
20930
20931 // Sub-integer MMA loads.
20932 // Only row/col layout is supported by A/B fragments.
20933 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20934 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
20935 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20936 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
20937 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20938 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
20939 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20940 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
20941 case NVPTX::BI__imma_m8n8k32_ld_c:
20942 return MMA_LDST(2, m8n8k32_load_c_s32);
20943 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20944 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
20945 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20946 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
20947 case NVPTX::BI__bmma_m8n8k128_ld_c:
20948 return MMA_LDST(2, m8n8k128_load_c_s32);
20949
20950 // Double MMA loads
20951 case NVPTX::BI__dmma_m8n8k4_ld_a:
20952 return MMA_LDST(1, m8n8k4_load_a_f64);
20953 case NVPTX::BI__dmma_m8n8k4_ld_b:
20954 return MMA_LDST(1, m8n8k4_load_b_f64);
20955 case NVPTX::BI__dmma_m8n8k4_ld_c:
20956 return MMA_LDST(2, m8n8k4_load_c_f64);
20957
20958 // Alternate float MMA loads
20959 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20960 return MMA_LDST(4, m16n16k16_load_a_bf16);
20961 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20962 return MMA_LDST(4, m16n16k16_load_b_bf16);
20963 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20964 return MMA_LDST(2, m8n32k16_load_a_bf16);
20965 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20966 return MMA_LDST(8, m8n32k16_load_b_bf16);
20967 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20968 return MMA_LDST(8, m32n8k16_load_a_bf16);
20969 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20970 return MMA_LDST(2, m32n8k16_load_b_bf16);
20971 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20972 return MMA_LDST(4, m16n16k8_load_a_tf32);
20973 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20974 return MMA_LDST(4, m16n16k8_load_b_tf32);
20975 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
20976 return MMA_LDST(8, m16n16k8_load_c_f32);
20977
20978 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
20979 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
20980 // use fragment C for both loads and stores.
20981 // FP MMA stores.
20982 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20983 return MMA_LDST(4, m16n16k16_store_d_f16);
20984 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20985 return MMA_LDST(8, m16n16k16_store_d_f32);
20986 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20987 return MMA_LDST(4, m32n8k16_store_d_f16);
20988 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20989 return MMA_LDST(8, m32n8k16_store_d_f32);
20990 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20991 return MMA_LDST(4, m8n32k16_store_d_f16);
20992 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20993 return MMA_LDST(8, m8n32k16_store_d_f32);
20994
20995 // Integer and sub-integer MMA stores.
20996 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
20997 // name, integer loads/stores use LLVM's i32.
20998 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20999 return MMA_LDST(8, m16n16k16_store_d_s32);
21000 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21001 return MMA_LDST(8, m32n8k16_store_d_s32);
21002 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21003 return MMA_LDST(8, m8n32k16_store_d_s32);
21004 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21005 return MMA_LDST(2, m8n8k32_store_d_s32);
21006 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21007 return MMA_LDST(2, m8n8k128_store_d_s32);
21008
21009 // Double MMA store
21010 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21011 return MMA_LDST(2, m8n8k4_store_d_f64);
21012
21013 // Alternate float MMA store
21014 case NVPTX::BI__mma_m16n16k8_st_c_f32:
21015 return MMA_LDST(8, m16n16k8_store_d_f32);
21016
21017 default:
21018 llvm_unreachable("Unknown MMA builtin");
21019 }
21020}
21021#undef MMA_LDST
21022#undef MMA_INTR
21023
21024
21025struct NVPTXMmaInfo {
21026 unsigned NumEltsA;
21027 unsigned NumEltsB;
21028 unsigned NumEltsC;
21029 unsigned NumEltsD;
21030
21031 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21032 // over 'col' for layout. The index of non-satf variants is expected to match
21033 // the undocumented layout constants used by CUDA's mma.hpp.
21034 std::array<unsigned, 8> Variants;
21035
21036 unsigned getMMAIntrinsic(int Layout, bool Satf) {
21037 unsigned Index = Layout + 4 * Satf;
21038 if (Index >= Variants.size())
21039 return 0;
21040 return Variants[Index];
21041 }
21042};
21043
21044 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21045 // Layout and Satf, 0 otherwise.
21046static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
21047 // clang-format off
21048#define MMA_VARIANTS(geom, type) \
21049 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21050 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21051 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21052 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21053#define MMA_SATF_VARIANTS(geom, type) \
21054 MMA_VARIANTS(geom, type), \
21055 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21056 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21057 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21058 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21059// Sub-integer MMA only supports row.col layout.
21060#define MMA_VARIANTS_I4(geom, type) \
21061 0, \
21062 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21063 0, \
21064 0, \
21065 0, \
21066 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21067 0, \
21068 0
21069// b1 MMA does not support .satfinite.
21070#define MMA_VARIANTS_B1_XOR(geom, type) \
21071 0, \
21072 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21073 0, \
21074 0, \
21075 0, \
21076 0, \
21077 0, \
21078 0
21079#define MMA_VARIANTS_B1_AND(geom, type) \
21080 0, \
21081 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21082 0, \
21083 0, \
21084 0, \
21085 0, \
21086 0, \
21087 0
21088 // clang-format on
21089 switch (BuiltinID) {
21090 // FP MMA
21091 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21092 // NumEltsN of return value are ordered as A,B,C,D.
21093 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21094 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
21095 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21096 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
21097 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21098 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
21099 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21100 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
21101 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21102 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
21103 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21104 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
21105 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21106 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
21107 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21108 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
21109 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21110 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
21111 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21112 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
21113 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21114 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
21115 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21116 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
21117
21118 // Integer MMA
21119 case NVPTX::BI__imma_m16n16k16_mma_s8:
21120 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
21121 case NVPTX::BI__imma_m16n16k16_mma_u8:
21122 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
21123 case NVPTX::BI__imma_m32n8k16_mma_s8:
21124 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
21125 case NVPTX::BI__imma_m32n8k16_mma_u8:
21126 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
21127 case NVPTX::BI__imma_m8n32k16_mma_s8:
21128 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
21129 case NVPTX::BI__imma_m8n32k16_mma_u8:
21130 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
21131
21132 // Sub-integer MMA
21133 case NVPTX::BI__imma_m8n8k32_mma_s4:
21134 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
21135 case NVPTX::BI__imma_m8n8k32_mma_u4:
21136 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
21137 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21138 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
21139 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21140 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
21141
21142 // Double MMA
21143 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21144 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
21145
21146 // Alternate FP MMA
21147 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21148 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
21149 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21150 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
21151 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21152 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
21153 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21154 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
21155 default:
21156 llvm_unreachable("Unexpected builtin ID.");
21157 }
21158#undef MMA_VARIANTS
21159#undef MMA_SATF_VARIANTS
21160#undef MMA_VARIANTS_I4
21161#undef MMA_VARIANTS_B1_AND
21162#undef MMA_VARIANTS_B1_XOR
21163}
21164
21165static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
21166 const CallExpr *E) {
21167 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21168 QualType ArgType = E->getArg(0)->getType();
21170 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21171 return CGF.Builder.CreateCall(
21172 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21173 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21174}
21175
21176static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
21177 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21178 QualType ArgType = E->getArg(0)->getType();
21180 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21181
21182 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21183 auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
21184 auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
21185 MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
21186 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21187
21188 return LD;
21189}
21190
21191static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
21192 const CallExpr *E) {
21193 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21194 llvm::Type *ElemTy =
21195 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21196 return CGF.Builder.CreateCall(
21197 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21198 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21199}
21200
21201static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
21202 CodeGenFunction &CGF, const CallExpr *E,
21203 int SrcSize) {
21204 return E->getNumArgs() == 3
21205 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
21206 {CGF.EmitScalarExpr(E->getArg(0)),
21207 CGF.EmitScalarExpr(E->getArg(1)),
21208 CGF.EmitScalarExpr(E->getArg(2))})
21209 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
21210 {CGF.EmitScalarExpr(E->getArg(0)),
21211 CGF.EmitScalarExpr(E->getArg(1))});
21212}
21213
21214static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
21215 const CallExpr *E, CodeGenFunction &CGF) {
21216 auto &C = CGF.CGM.getContext();
21217 if (!(C.getLangOpts().NativeHalfType ||
21218 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
21219 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21220 " requires native half type support.");
21221 return nullptr;
21222 }
21223
21224 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21225 return MakeLdg(CGF, E);
21226
21227 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21228 return MakeLdu(IntrinsicID, CGF, E);
21229
21231 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
21232 auto *FTy = F->getFunctionType();
21233 unsigned ICEArguments = 0;
21235 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21236 assert(Error == ASTContext::GE_None && "Should not codegen an error");
21237 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
21238 assert((ICEArguments & (1 << i)) == 0);
21239 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
21240 auto *PTy = FTy->getParamType(i);
21241 if (PTy != ArgValue->getType())
21242 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
21243 Args.push_back(ArgValue);
21244 }
21245
21246 return CGF.Builder.CreateCall(F, Args);
21247}
21248} // namespace
21249
21251 const CallExpr *E) {
21252 switch (BuiltinID) {
21253 case NVPTX::BI__nvvm_atom_add_gen_i:
21254 case NVPTX::BI__nvvm_atom_add_gen_l:
21255 case NVPTX::BI__nvvm_atom_add_gen_ll:
21256 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
21257
21258 case NVPTX::BI__nvvm_atom_sub_gen_i:
21259 case NVPTX::BI__nvvm_atom_sub_gen_l:
21260 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21261 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
21262
21263 case NVPTX::BI__nvvm_atom_and_gen_i:
21264 case NVPTX::BI__nvvm_atom_and_gen_l:
21265 case NVPTX::BI__nvvm_atom_and_gen_ll:
21266 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
21267
21268 case NVPTX::BI__nvvm_atom_or_gen_i:
21269 case NVPTX::BI__nvvm_atom_or_gen_l:
21270 case NVPTX::BI__nvvm_atom_or_gen_ll:
21271 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
21272
21273 case NVPTX::BI__nvvm_atom_xor_gen_i:
21274 case NVPTX::BI__nvvm_atom_xor_gen_l:
21275 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21276 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
21277
21278 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21279 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21280 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21281 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
21282
21283 case NVPTX::BI__nvvm_atom_max_gen_i:
21284 case NVPTX::BI__nvvm_atom_max_gen_l:
21285 case NVPTX::BI__nvvm_atom_max_gen_ll:
21286 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
21287
21288 case NVPTX::BI__nvvm_atom_max_gen_ui:
21289 case NVPTX::BI__nvvm_atom_max_gen_ul:
21290 case NVPTX::BI__nvvm_atom_max_gen_ull:
21291 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
21292
21293 case NVPTX::BI__nvvm_atom_min_gen_i:
21294 case NVPTX::BI__nvvm_atom_min_gen_l:
21295 case NVPTX::BI__nvvm_atom_min_gen_ll:
21296 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
21297
21298 case NVPTX::BI__nvvm_atom_min_gen_ui:
21299 case NVPTX::BI__nvvm_atom_min_gen_ul:
21300 case NVPTX::BI__nvvm_atom_min_gen_ull:
21301 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
21302
21303 case NVPTX::BI__nvvm_atom_cas_gen_us:
21304 case NVPTX::BI__nvvm_atom_cas_gen_i:
21305 case NVPTX::BI__nvvm_atom_cas_gen_l:
21306 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21307 // __nvvm_atom_cas_gen_* should return the old value rather than the
21308 // success flag.
21309 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
21310
21311 case NVPTX::BI__nvvm_atom_add_gen_f:
21312 case NVPTX::BI__nvvm_atom_add_gen_d: {
21313 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
21314 Value *Val = EmitScalarExpr(E->getArg(1));
21315
21316 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
21317 AtomicOrdering::SequentiallyConsistent);
21318 }
21319
21320 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21321 Value *Ptr = EmitScalarExpr(E->getArg(0));
21322 Value *Val = EmitScalarExpr(E->getArg(1));
21323 Function *FnALI32 =
21324 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
21325 return Builder.CreateCall(FnALI32, {Ptr, Val});
21326 }
21327
21328 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21329 Value *Ptr = EmitScalarExpr(E->getArg(0));
21330 Value *Val = EmitScalarExpr(E->getArg(1));
21331 Function *FnALD32 =
21332 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
21333 return Builder.CreateCall(FnALD32, {Ptr, Val});
21334 }
21335
21336 case NVPTX::BI__nvvm_ldg_c:
21337 case NVPTX::BI__nvvm_ldg_sc:
21338 case NVPTX::BI__nvvm_ldg_c2:
21339 case NVPTX::BI__nvvm_ldg_sc2:
21340 case NVPTX::BI__nvvm_ldg_c4:
21341 case NVPTX::BI__nvvm_ldg_sc4:
21342 case NVPTX::BI__nvvm_ldg_s:
21343 case NVPTX::BI__nvvm_ldg_s2:
21344 case NVPTX::BI__nvvm_ldg_s4:
21345 case NVPTX::BI__nvvm_ldg_i:
21346 case NVPTX::BI__nvvm_ldg_i2:
21347 case NVPTX::BI__nvvm_ldg_i4:
21348 case NVPTX::BI__nvvm_ldg_l:
21349 case NVPTX::BI__nvvm_ldg_l2:
21350 case NVPTX::BI__nvvm_ldg_ll:
21351 case NVPTX::BI__nvvm_ldg_ll2:
21352 case NVPTX::BI__nvvm_ldg_uc:
21353 case NVPTX::BI__nvvm_ldg_uc2:
21354 case NVPTX::BI__nvvm_ldg_uc4:
21355 case NVPTX::BI__nvvm_ldg_us:
21356 case NVPTX::BI__nvvm_ldg_us2:
21357 case NVPTX::BI__nvvm_ldg_us4:
21358 case NVPTX::BI__nvvm_ldg_ui:
21359 case NVPTX::BI__nvvm_ldg_ui2:
21360 case NVPTX::BI__nvvm_ldg_ui4:
21361 case NVPTX::BI__nvvm_ldg_ul:
21362 case NVPTX::BI__nvvm_ldg_ul2:
21363 case NVPTX::BI__nvvm_ldg_ull:
21364 case NVPTX::BI__nvvm_ldg_ull2:
21365 case NVPTX::BI__nvvm_ldg_f:
21366 case NVPTX::BI__nvvm_ldg_f2:
21367 case NVPTX::BI__nvvm_ldg_f4:
21368 case NVPTX::BI__nvvm_ldg_d:
21369 case NVPTX::BI__nvvm_ldg_d2:
21370 // PTX Interoperability section 2.2: "For a vector with an even number of
21371 // elements, its alignment is set to number of elements times the alignment
21372 // of its member: n*alignof(t)."
21373 return MakeLdg(*this, E);
21374
21375 case NVPTX::BI__nvvm_ldu_c:
21376 case NVPTX::BI__nvvm_ldu_sc:
21377 case NVPTX::BI__nvvm_ldu_c2:
21378 case NVPTX::BI__nvvm_ldu_sc2:
21379 case NVPTX::BI__nvvm_ldu_c4:
21380 case NVPTX::BI__nvvm_ldu_sc4:
21381 case NVPTX::BI__nvvm_ldu_s:
21382 case NVPTX::BI__nvvm_ldu_s2:
21383 case NVPTX::BI__nvvm_ldu_s4:
21384 case NVPTX::BI__nvvm_ldu_i:
21385 case NVPTX::BI__nvvm_ldu_i2:
21386 case NVPTX::BI__nvvm_ldu_i4:
21387 case NVPTX::BI__nvvm_ldu_l:
21388 case NVPTX::BI__nvvm_ldu_l2:
21389 case NVPTX::BI__nvvm_ldu_ll:
21390 case NVPTX::BI__nvvm_ldu_ll2:
21391 case NVPTX::BI__nvvm_ldu_uc:
21392 case NVPTX::BI__nvvm_ldu_uc2:
21393 case NVPTX::BI__nvvm_ldu_uc4:
21394 case NVPTX::BI__nvvm_ldu_us:
21395 case NVPTX::BI__nvvm_ldu_us2:
21396 case NVPTX::BI__nvvm_ldu_us4:
21397 case NVPTX::BI__nvvm_ldu_ui:
21398 case NVPTX::BI__nvvm_ldu_ui2:
21399 case NVPTX::BI__nvvm_ldu_ui4:
21400 case NVPTX::BI__nvvm_ldu_ul:
21401 case NVPTX::BI__nvvm_ldu_ul2:
21402 case NVPTX::BI__nvvm_ldu_ull:
21403 case NVPTX::BI__nvvm_ldu_ull2:
21404 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
21405 case NVPTX::BI__nvvm_ldu_f:
21406 case NVPTX::BI__nvvm_ldu_f2:
21407 case NVPTX::BI__nvvm_ldu_f4:
21408 case NVPTX::BI__nvvm_ldu_d:
21409 case NVPTX::BI__nvvm_ldu_d2:
21410 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
21411
21412 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21413 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21414 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21415 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
21416 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21417 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21418 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21419 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
21420 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21421 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21422 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
21423 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21424 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21425 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
21426 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21427 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21428 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21429 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
21430 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21431 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21432 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21433 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
21434 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21435 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21436 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21437 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21438 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21439 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21440 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
21441 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21442 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21443 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21444 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21445 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21446 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21447 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
21448 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21449 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21450 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21451 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21452 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21453 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21454 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
21455 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21456 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21457 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21458 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21459 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21460 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21461 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
21462 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21463 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
21464 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21465 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
21466 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21467 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
21468 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21469 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
21470 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21471 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21472 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21473 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
21474 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21475 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21476 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21477 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
21478 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21479 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21480 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21481 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
21482 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21483 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21484 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21485 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
21486 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21487 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21488 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21489 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
21490 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21491 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21492 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21493 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
21494 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21495 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21496 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21497 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21498 Value *Ptr = EmitScalarExpr(E->getArg(0));
21499 llvm::Type *ElemTy =
21500 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21501 return Builder.CreateCall(
21503 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21504 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21505 }
21506 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21507 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21508 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21509 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21510 Value *Ptr = EmitScalarExpr(E->getArg(0));
21511 llvm::Type *ElemTy =
21512 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21513 return Builder.CreateCall(
21515 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21516 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21517 }
21518 case NVPTX::BI__nvvm_match_all_sync_i32p:
21519 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21520 Value *Mask = EmitScalarExpr(E->getArg(0));
21521 Value *Val = EmitScalarExpr(E->getArg(1));
21522 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
21523 Value *ResultPair = Builder.CreateCall(
21524 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
21525 ? Intrinsic::nvvm_match_all_sync_i32p
21526 : Intrinsic::nvvm_match_all_sync_i64p),
21527 {Mask, Val});
21528 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
21529 PredOutPtr.getElementType());
21530 Builder.CreateStore(Pred, PredOutPtr);
21531 return Builder.CreateExtractValue(ResultPair, 0);
21532 }
21533
21534 // FP MMA loads
21535 case NVPTX::BI__hmma_m16n16k16_ld_a:
21536 case NVPTX::BI__hmma_m16n16k16_ld_b:
21537 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21538 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21539 case NVPTX::BI__hmma_m32n8k16_ld_a:
21540 case NVPTX::BI__hmma_m32n8k16_ld_b:
21541 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21542 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21543 case NVPTX::BI__hmma_m8n32k16_ld_a:
21544 case NVPTX::BI__hmma_m8n32k16_ld_b:
21545 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21546 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21547 // Integer MMA loads.
21548 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21549 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21550 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21551 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21552 case NVPTX::BI__imma_m16n16k16_ld_c:
21553 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21554 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21555 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21556 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21557 case NVPTX::BI__imma_m32n8k16_ld_c:
21558 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21559 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21560 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21561 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21562 case NVPTX::BI__imma_m8n32k16_ld_c:
21563 // Sub-integer MMA loads.
21564 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21565 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21566 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21567 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21568 case NVPTX::BI__imma_m8n8k32_ld_c:
21569 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21570 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21571 case NVPTX::BI__bmma_m8n8k128_ld_c:
21572 // Double MMA loads.
21573 case NVPTX::BI__dmma_m8n8k4_ld_a:
21574 case NVPTX::BI__dmma_m8n8k4_ld_b:
21575 case NVPTX::BI__dmma_m8n8k4_ld_c:
21576 // Alternate float MMA loads.
21577 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21578 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21579 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21580 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21581 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21582 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21583 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21584 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21585 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21586 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21587 Value *Src = EmitScalarExpr(E->getArg(1));
21588 Value *Ldm = EmitScalarExpr(E->getArg(2));
21589 std::optional<llvm::APSInt> isColMajorArg =
21590 E->getArg(3)->getIntegerConstantExpr(getContext());
21591 if (!isColMajorArg)
21592 return nullptr;
21593 bool isColMajor = isColMajorArg->getSExtValue();
21594 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21595 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21596 if (IID == 0)
21597 return nullptr;
21598
21599 Value *Result =
21600 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
21601
21602 // Save returned values.
21603 assert(II.NumResults);
21604 if (II.NumResults == 1) {
21607 } else {
21608 for (unsigned i = 0; i < II.NumResults; ++i) {
21610 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
21611 Dst.getElementType()),
21613 llvm::ConstantInt::get(IntTy, i)),
21615 }
21616 }
21617 return Result;
21618 }
21619
21620 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21621 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21622 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21623 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21624 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21625 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21626 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21627 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21628 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21629 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21630 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21631 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21632 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21633 Value *Dst = EmitScalarExpr(E->getArg(0));
21634 Address Src = EmitPointerWithAlignment(E->getArg(1));
21635 Value *Ldm = EmitScalarExpr(E->getArg(2));
21636 std::optional<llvm::APSInt> isColMajorArg =
21637 E->getArg(3)->getIntegerConstantExpr(getContext());
21638 if (!isColMajorArg)
21639 return nullptr;
21640 bool isColMajor = isColMajorArg->getSExtValue();
21641 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21642 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21643 if (IID == 0)
21644 return nullptr;
21645 Function *Intrinsic =
21646 CGM.getIntrinsic(IID, Dst->getType());
21647 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21648 SmallVector<Value *, 10> Values = {Dst};
21649 for (unsigned i = 0; i < II.NumResults; ++i) {
21651 Src.getElementType(),
21653 llvm::ConstantInt::get(IntTy, i)),
21655 Values.push_back(Builder.CreateBitCast(V, ParamType));
21656 }
21657 Values.push_back(Ldm);
21658 Value *Result = Builder.CreateCall(Intrinsic, Values);
21659 return Result;
21660 }
21661
21662 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21663 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21664 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21665 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21666 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21667 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21668 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21669 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21670 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21671 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21672 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21673 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21674 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21675 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21676 case NVPTX::BI__imma_m16n16k16_mma_s8:
21677 case NVPTX::BI__imma_m16n16k16_mma_u8:
21678 case NVPTX::BI__imma_m32n8k16_mma_s8:
21679 case NVPTX::BI__imma_m32n8k16_mma_u8:
21680 case NVPTX::BI__imma_m8n32k16_mma_s8:
21681 case NVPTX::BI__imma_m8n32k16_mma_u8:
21682 case NVPTX::BI__imma_m8n8k32_mma_s4:
21683 case NVPTX::BI__imma_m8n8k32_mma_u4:
21684 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21685 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21686 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21687 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21688 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21689 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21690 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21691 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21692 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
21693 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
21694 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
21695 std::optional<llvm::APSInt> LayoutArg =
21696 E->getArg(4)->getIntegerConstantExpr(getContext());
21697 if (!LayoutArg)
21698 return nullptr;
21699 int Layout = LayoutArg->getSExtValue();
21700 if (Layout < 0 || Layout > 3)
21701 return nullptr;
21702 llvm::APSInt SatfArg;
21703 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21704 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21705 SatfArg = 0; // .b1 does not have satf argument.
21706 else if (std::optional<llvm::APSInt> OptSatfArg =
21707 E->getArg(5)->getIntegerConstantExpr(getContext()))
21708 SatfArg = *OptSatfArg;
21709 else
21710 return nullptr;
21711 bool Satf = SatfArg.getSExtValue();
21712 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21713 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21714 if (IID == 0) // Unsupported combination of Layout/Satf.
21715 return nullptr;
21716
21718 Function *Intrinsic = CGM.getIntrinsic(IID);
21719 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21720 // Load A
21721 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
21723 SrcA.getElementType(),
21724 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
21725 llvm::ConstantInt::get(IntTy, i)),
21727 Values.push_back(Builder.CreateBitCast(V, AType));
21728 }
21729 // Load B
21730 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21731 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
21733 SrcB.getElementType(),
21734 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
21735 llvm::ConstantInt::get(IntTy, i)),
21737 Values.push_back(Builder.CreateBitCast(V, BType));
21738 }
21739 // Load C
21740 llvm::Type *CType =
21741 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21742 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
21744 SrcC.getElementType(),
21745 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
21746 llvm::ConstantInt::get(IntTy, i)),
21748 Values.push_back(Builder.CreateBitCast(V, CType));
21749 }
21750 Value *Result = Builder.CreateCall(Intrinsic, Values);
21751 llvm::Type *DType = Dst.getElementType();
21752 for (unsigned i = 0; i < MI.NumEltsD; ++i)
21754 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
21756 llvm::ConstantInt::get(IntTy, i)),
21758 return Result;
21759 }
21760 // The following builtins require half type support
21761 case NVPTX::BI__nvvm_ex2_approx_f16:
21762 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
21763 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21764 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
21765 case NVPTX::BI__nvvm_ff2f16x2_rn:
21766 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
21767 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21768 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
21769 case NVPTX::BI__nvvm_ff2f16x2_rz:
21770 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
21771 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21772 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
21773 case NVPTX::BI__nvvm_fma_rn_f16:
21774 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
21775 case NVPTX::BI__nvvm_fma_rn_f16x2:
21776 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
21777 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21778 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
21779 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21780 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
21781 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21782 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
21783 *this);
21784 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21785 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
21786 *this);
21787 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21788 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
21789 *this);
21790 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21791 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
21792 *this);
21793 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21794 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
21795 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21796 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
21797 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21798 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
21799 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21800 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
21801 case NVPTX::BI__nvvm_fmax_f16:
21802 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
21803 case NVPTX::BI__nvvm_fmax_f16x2:
21804 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
21805 case NVPTX::BI__nvvm_fmax_ftz_f16:
21806 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
21807 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
21808 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
21809 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
21810 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
21811 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
21812 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
21813 *this);
21814 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
21815 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
21816 E, *this);
21817 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
21818 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
21819 BuiltinID, E, *this);
21820 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
21821 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
21822 *this);
21823 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
21824 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
21825 E, *this);
21826 case NVPTX::BI__nvvm_fmax_nan_f16:
21827 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
21828 case NVPTX::BI__nvvm_fmax_nan_f16x2:
21829 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
21830 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
21831 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
21832 *this);
21833 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
21834 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
21835 E, *this);
21836 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
21837 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
21838 *this);
21839 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
21840 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
21841 *this);
21842 case NVPTX::BI__nvvm_fmin_f16:
21843 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
21844 case NVPTX::BI__nvvm_fmin_f16x2:
21845 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
21846 case NVPTX::BI__nvvm_fmin_ftz_f16:
21847 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
21848 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
21849 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
21850 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
21851 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
21852 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
21853 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
21854 *this);
21855 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
21856 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
21857 E, *this);
21858 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
21859 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
21860 BuiltinID, E, *this);
21861 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
21862 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
21863 *this);
21864 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
21865 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
21866 E, *this);
21867 case NVPTX::BI__nvvm_fmin_nan_f16:
21868 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
21869 case NVPTX::BI__nvvm_fmin_nan_f16x2:
21870 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
21871 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
21872 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
21873 *this);
21874 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
21875 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
21876 E, *this);
21877 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
21878 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
21879 *this);
21880 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
21881 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
21882 *this);
21883 case NVPTX::BI__nvvm_ldg_h:
21884 case NVPTX::BI__nvvm_ldg_h2:
21885 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
21886 case NVPTX::BI__nvvm_ldu_h:
21887 case NVPTX::BI__nvvm_ldu_h2:
21888 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21889 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
21890 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
21891 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
21892 4);
21893 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
21894 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
21895 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
21896 8);
21897 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
21898 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
21899 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
21900 16);
21901 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
21902 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
21903 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
21904 16);
21905 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
21906 return Builder.CreateCall(
21907 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
21908 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
21909 return Builder.CreateCall(
21910 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
21911 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
21912 return Builder.CreateCall(
21913 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
21914 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
21915 return Builder.CreateCall(
21916 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
21917 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
21918 return Builder.CreateCall(
21919 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
21920 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
21921 return Builder.CreateCall(
21922 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
21923 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
21924 return Builder.CreateCall(
21925 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
21926 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
21927 return Builder.CreateCall(
21928 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
21929 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
21930 return Builder.CreateCall(
21931 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
21932 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
21933 return Builder.CreateCall(
21934 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
21935 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
21936 return Builder.CreateCall(
21937 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
21938 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
21939 return Builder.CreateCall(
21940 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
21941 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
21942 return Builder.CreateCall(
21943 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
21944 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
21945 return Builder.CreateCall(
21946 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
21947 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
21948 return Builder.CreateCall(
21949 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
21950 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
21951 return Builder.CreateCall(
21952 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
21953 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
21954 return Builder.CreateCall(
21955 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
21956 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
21957 return Builder.CreateCall(
21958 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
21959 case NVPTX::BI__nvvm_is_explicit_cluster:
21960 return Builder.CreateCall(
21961 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
21962 case NVPTX::BI__nvvm_isspacep_shared_cluster:
21963 return Builder.CreateCall(
21964 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
21965 EmitScalarExpr(E->getArg(0)));
21966 case NVPTX::BI__nvvm_mapa:
21967 return Builder.CreateCall(
21968 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
21969 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21970 case NVPTX::BI__nvvm_mapa_shared_cluster:
21971 return Builder.CreateCall(
21972 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
21973 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21974 case NVPTX::BI__nvvm_getctarank:
21975 return Builder.CreateCall(
21976 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
21977 EmitScalarExpr(E->getArg(0)));
21978 case NVPTX::BI__nvvm_getctarank_shared_cluster:
21979 return Builder.CreateCall(
21980 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
21981 EmitScalarExpr(E->getArg(0)));
21982 case NVPTX::BI__nvvm_barrier_cluster_arrive:
21983 return Builder.CreateCall(
21984 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
21985 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
21986 return Builder.CreateCall(
21987 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
21988 case NVPTX::BI__nvvm_barrier_cluster_wait:
21989 return Builder.CreateCall(
21990 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
21991 case NVPTX::BI__nvvm_fence_sc_cluster:
21992 return Builder.CreateCall(
21993 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
21994 default:
21995 return nullptr;
21996 }
21997}
21998
21999namespace {
22000struct BuiltinAlignArgs {
22001 llvm::Value *Src = nullptr;
22002 llvm::Type *SrcType = nullptr;
22003 llvm::Value *Alignment = nullptr;
22004 llvm::Value *Mask = nullptr;
22005 llvm::IntegerType *IntType = nullptr;
22006
22007 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
22008 QualType AstType = E->getArg(0)->getType();
22009 if (AstType->isArrayType())
22010 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
22011 else
22012 Src = CGF.EmitScalarExpr(E->getArg(0));
22013 SrcType = Src->getType();
22014 if (SrcType->isPointerTy()) {
22015 IntType = IntegerType::get(
22016 CGF.getLLVMContext(),
22017 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
22018 } else {
22019 assert(SrcType->isIntegerTy());
22020 IntType = cast<llvm::IntegerType>(SrcType);
22021 }
22022 Alignment = CGF.EmitScalarExpr(E->getArg(1));
22023 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22024 auto *One = llvm::ConstantInt::get(IntType, 1);
22025 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22026 }
22027};
22028} // namespace
22029
22030/// Generate (x & (y-1)) == 0.
22032 BuiltinAlignArgs Args(E, *this);
22033 llvm::Value *SrcAddress = Args.Src;
22034 if (Args.SrcType->isPointerTy())
22035 SrcAddress =
22036 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22037 return RValue::get(Builder.CreateICmpEQ(
22038 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22039 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22040}
22041
22042/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22043/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22044/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22046 BuiltinAlignArgs Args(E, *this);
22047 llvm::Value *SrcForMask = Args.Src;
22048 if (AlignUp) {
22049 // When aligning up we have to first add the mask to ensure we go over the
22050 // next alignment value and then align down to the next valid multiple.
22051 // By adding the mask, we ensure that align_up on an already aligned
22052 // value will not change the value.
22053 if (Args.Src->getType()->isPointerTy()) {
22054 if (getLangOpts().isSignedOverflowDefined())
22055 SrcForMask =
22056 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22057 else
22058 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22059 /*SignedIndices=*/true,
22060 /*isSubtraction=*/false,
22061 E->getExprLoc(), "over_boundary");
22062 } else {
22063 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22064 }
22065 }
22066 // Invert the mask to only clear the lower bits.
22067 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22068 llvm::Value *Result = nullptr;
22069 if (Args.Src->getType()->isPointerTy()) {
22070 Result = Builder.CreateIntrinsic(
22071 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22072 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22073 } else {
22074 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22075 }
22076 assert(Result->getType() == Args.SrcType);
22077 return RValue::get(Result);
22078}
22079
22081 const CallExpr *E) {
22082 switch (BuiltinID) {
22083 case WebAssembly::BI__builtin_wasm_memory_size: {
22084 llvm::Type *ResultType = ConvertType(E->getType());
22085 Value *I = EmitScalarExpr(E->getArg(0));
22086 Function *Callee =
22087 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22088 return Builder.CreateCall(Callee, I);
22089 }
22090 case WebAssembly::BI__builtin_wasm_memory_grow: {
22091 llvm::Type *ResultType = ConvertType(E->getType());
22092 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22093 EmitScalarExpr(E->getArg(1))};
22094 Function *Callee =
22095 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22096 return Builder.CreateCall(Callee, Args);
22097 }
22098 case WebAssembly::BI__builtin_wasm_tls_size: {
22099 llvm::Type *ResultType = ConvertType(E->getType());
22100 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22101 return Builder.CreateCall(Callee);
22102 }
22103 case WebAssembly::BI__builtin_wasm_tls_align: {
22104 llvm::Type *ResultType = ConvertType(E->getType());
22105 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22106 return Builder.CreateCall(Callee);
22107 }
22108 case WebAssembly::BI__builtin_wasm_tls_base: {
22109 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22110 return Builder.CreateCall(Callee);
22111 }
22112 case WebAssembly::BI__builtin_wasm_throw: {
22113 Value *Tag = EmitScalarExpr(E->getArg(0));
22114 Value *Obj = EmitScalarExpr(E->getArg(1));
22115 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22116 return Builder.CreateCall(Callee, {Tag, Obj});
22117 }
22118 case WebAssembly::BI__builtin_wasm_rethrow: {
22119 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22120 return Builder.CreateCall(Callee);
22121 }
22122 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22123 Value *Addr = EmitScalarExpr(E->getArg(0));
22124 Value *Expected = EmitScalarExpr(E->getArg(1));
22125 Value *Timeout = EmitScalarExpr(E->getArg(2));
22126 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22127 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22128 }
22129 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22130 Value *Addr = EmitScalarExpr(E->getArg(0));
22131 Value *Expected = EmitScalarExpr(E->getArg(1));
22132 Value *Timeout = EmitScalarExpr(E->getArg(2));
22133 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22134 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22135 }
22136 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22137 Value *Addr = EmitScalarExpr(E->getArg(0));
22138 Value *Count = EmitScalarExpr(E->getArg(1));
22139 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22140 return Builder.CreateCall(Callee, {Addr, Count});
22141 }
22142 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22143 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22144 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22145 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22146 Value *Src = EmitScalarExpr(E->getArg(0));
22147 llvm::Type *ResT = ConvertType(E->getType());
22148 Function *Callee =
22149 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22150 return Builder.CreateCall(Callee, {Src});
22151 }
22152 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22153 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22154 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22155 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22156 Value *Src = EmitScalarExpr(E->getArg(0));
22157 llvm::Type *ResT = ConvertType(E->getType());
22158 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22159 {ResT, Src->getType()});
22160 return Builder.CreateCall(Callee, {Src});
22161 }
22162 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22163 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22164 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22165 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22166 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22167 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22168 Value *Src = EmitScalarExpr(E->getArg(0));
22169 llvm::Type *ResT = ConvertType(E->getType());
22170 Function *Callee =
22171 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22172 return Builder.CreateCall(Callee, {Src});
22173 }
22174 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22175 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22176 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22177 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22178 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22179 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22180 Value *Src = EmitScalarExpr(E->getArg(0));
22181 llvm::Type *ResT = ConvertType(E->getType());
22182 Function *Callee =
22183 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22184 return Builder.CreateCall(Callee, {Src});
22185 }
22186 case WebAssembly::BI__builtin_wasm_min_f32:
22187 case WebAssembly::BI__builtin_wasm_min_f64:
22188 case WebAssembly::BI__builtin_wasm_min_f16x8:
22189 case WebAssembly::BI__builtin_wasm_min_f32x4:
22190 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22191 Value *LHS = EmitScalarExpr(E->getArg(0));
22192 Value *RHS = EmitScalarExpr(E->getArg(1));
22193 Function *Callee =
22194 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22195 return Builder.CreateCall(Callee, {LHS, RHS});
22196 }
22197 case WebAssembly::BI__builtin_wasm_max_f32:
22198 case WebAssembly::BI__builtin_wasm_max_f64:
22199 case WebAssembly::BI__builtin_wasm_max_f16x8:
22200 case WebAssembly::BI__builtin_wasm_max_f32x4:
22201 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22202 Value *LHS = EmitScalarExpr(E->getArg(0));
22203 Value *RHS = EmitScalarExpr(E->getArg(1));
22204 Function *Callee =
22205 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22206 return Builder.CreateCall(Callee, {LHS, RHS});
22207 }
22208 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22209 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22210 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22211 Value *LHS = EmitScalarExpr(E->getArg(0));
22212 Value *RHS = EmitScalarExpr(E->getArg(1));
22213 Function *Callee =
22214 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22215 return Builder.CreateCall(Callee, {LHS, RHS});
22216 }
22217 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22218 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22219 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22220 Value *LHS = EmitScalarExpr(E->getArg(0));
22221 Value *RHS = EmitScalarExpr(E->getArg(1));
22222 Function *Callee =
22223 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22224 return Builder.CreateCall(Callee, {LHS, RHS});
22225 }
22226 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22227 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22228 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22229 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22230 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22231 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22232 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22233 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22234 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22235 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22236 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22237 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22238 unsigned IntNo;
22239 switch (BuiltinID) {
22240 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22241 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22242 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22243 IntNo = Intrinsic::ceil;
22244 break;
22245 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22246 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22247 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22248 IntNo = Intrinsic::floor;
22249 break;
22250 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22251 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22252 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22253 IntNo = Intrinsic::trunc;
22254 break;
22255 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22256 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22257 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22258 IntNo = Intrinsic::nearbyint;
22259 break;
22260 default:
22261 llvm_unreachable("unexpected builtin ID");
22262 }
22263 Value *Value = EmitScalarExpr(E->getArg(0));
22265 return Builder.CreateCall(Callee, Value);
22266 }
22267 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22268 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22269 return Builder.CreateCall(Callee);
22270 }
22271 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22272 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22273 return Builder.CreateCall(Callee);
22274 }
22275 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22276 Value *Src = EmitScalarExpr(E->getArg(0));
22277 Value *Indices = EmitScalarExpr(E->getArg(1));
22278 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22279 return Builder.CreateCall(Callee, {Src, Indices});
22280 }
22281 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22282 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22283 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22284 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22285 Value *Vec = EmitScalarExpr(E->getArg(0));
22286 Value *Neg = Builder.CreateNeg(Vec, "neg");
22287 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
22288 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
22289 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
22290 }
22291 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22292 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22293 Value *LHS = EmitScalarExpr(E->getArg(0));
22294 Value *RHS = EmitScalarExpr(E->getArg(1));
22295 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
22296 ConvertType(E->getType()));
22297 return Builder.CreateCall(Callee, {LHS, RHS});
22298 }
22299 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22300 Value *LHS = EmitScalarExpr(E->getArg(0));
22301 Value *RHS = EmitScalarExpr(E->getArg(1));
22302 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
22303 return Builder.CreateCall(Callee, {LHS, RHS});
22304 }
22305 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22306 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22307 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22308 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22309 Value *Vec = EmitScalarExpr(E->getArg(0));
22310 unsigned IntNo;
22311 switch (BuiltinID) {
22312 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22313 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22314 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22315 break;
22316 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22317 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22318 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22319 break;
22320 default:
22321 llvm_unreachable("unexpected builtin ID");
22322 }
22323
22325 return Builder.CreateCall(Callee, Vec);
22326 }
22327 case WebAssembly::BI__builtin_wasm_bitselect: {
22328 Value *V1 = EmitScalarExpr(E->getArg(0));
22329 Value *V2 = EmitScalarExpr(E->getArg(1));
22330 Value *C = EmitScalarExpr(E->getArg(2));
22331 Function *Callee =
22332 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
22333 return Builder.CreateCall(Callee, {V1, V2, C});
22334 }
22335 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22336 Value *LHS = EmitScalarExpr(E->getArg(0));
22337 Value *RHS = EmitScalarExpr(E->getArg(1));
22338 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
22339 return Builder.CreateCall(Callee, {LHS, RHS});
22340 }
22341 case WebAssembly::BI__builtin_wasm_any_true_v128:
22342 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22343 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22344 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22345 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22346 unsigned IntNo;
22347 switch (BuiltinID) {
22348 case WebAssembly::BI__builtin_wasm_any_true_v128:
22349 IntNo = Intrinsic::wasm_anytrue;
22350 break;
22351 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22352 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22353 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22354 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22355 IntNo = Intrinsic::wasm_alltrue;
22356 break;
22357 default:
22358 llvm_unreachable("unexpected builtin ID");
22359 }
22360 Value *Vec = EmitScalarExpr(E->getArg(0));
22361 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
22362 return Builder.CreateCall(Callee, {Vec});
22363 }
22364 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22365 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22366 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22367 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22368 Value *Vec = EmitScalarExpr(E->getArg(0));
22369 Function *Callee =
22370 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
22371 return Builder.CreateCall(Callee, {Vec});
22372 }
22373 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22374 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22375 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22376 Value *Vec = EmitScalarExpr(E->getArg(0));
22377 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
22378 return Builder.CreateCall(Callee, {Vec});
22379 }
22380 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22381 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22382 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22383 Value *Vec = EmitScalarExpr(E->getArg(0));
22384 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
22385 return Builder.CreateCall(Callee, {Vec});
22386 }
22387 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22388 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22389 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22390 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22391 Value *Low = EmitScalarExpr(E->getArg(0));
22392 Value *High = EmitScalarExpr(E->getArg(1));
22393 unsigned IntNo;
22394 switch (BuiltinID) {
22395 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22396 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22397 IntNo = Intrinsic::wasm_narrow_signed;
22398 break;
22399 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22400 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22401 IntNo = Intrinsic::wasm_narrow_unsigned;
22402 break;
22403 default:
22404 llvm_unreachable("unexpected builtin ID");
22405 }
22406 Function *Callee =
22407 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
22408 return Builder.CreateCall(Callee, {Low, High});
22409 }
22410 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22411 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22412 Value *Vec = EmitScalarExpr(E->getArg(0));
22413 unsigned IntNo;
22414 switch (BuiltinID) {
22415 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22416 IntNo = Intrinsic::fptosi_sat;
22417 break;
22418 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22419 IntNo = Intrinsic::fptoui_sat;
22420 break;
22421 default:
22422 llvm_unreachable("unexpected builtin ID");
22423 }
22424 llvm::Type *SrcT = Vec->getType();
22425 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
22426 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
22427 Value *Trunc = Builder.CreateCall(Callee, Vec);
22428 Value *Splat = Constant::getNullValue(TruncT);
22429 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
22430 }
22431 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22432 Value *Ops[18];
22433 size_t OpIdx = 0;
22434 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
22435 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
22436 while (OpIdx < 18) {
22437 std::optional<llvm::APSInt> LaneConst =
22438 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
22439 assert(LaneConst && "Constant arg isn't actually constant?");
22440 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
22441 }
22442 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
22443 return Builder.CreateCall(Callee, Ops);
22444 }
22445 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22446 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22447 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22448 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22449 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22450 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22451 Value *A = EmitScalarExpr(E->getArg(0));
22452 Value *B = EmitScalarExpr(E->getArg(1));
22453 Value *C = EmitScalarExpr(E->getArg(2));
22454 unsigned IntNo;
22455 switch (BuiltinID) {
22456 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22457 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22458 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22459 IntNo = Intrinsic::wasm_relaxed_madd;
22460 break;
22461 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22462 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22463 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22464 IntNo = Intrinsic::wasm_relaxed_nmadd;
22465 break;
22466 default:
22467 llvm_unreachable("unexpected builtin ID");
22468 }
22469 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
22470 return Builder.CreateCall(Callee, {A, B, C});
22471 }
22472 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22473 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22474 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22475 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22476 Value *A = EmitScalarExpr(E->getArg(0));
22477 Value *B = EmitScalarExpr(E->getArg(1));
22478 Value *C = EmitScalarExpr(E->getArg(2));
22479 Function *Callee =
22480 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
22481 return Builder.CreateCall(Callee, {A, B, C});
22482 }
22483 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22484 Value *Src = EmitScalarExpr(E->getArg(0));
22485 Value *Indices = EmitScalarExpr(E->getArg(1));
22486 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
22487 return Builder.CreateCall(Callee, {Src, Indices});
22488 }
22489 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22490 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22491 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22492 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22493 Value *LHS = EmitScalarExpr(E->getArg(0));
22494 Value *RHS = EmitScalarExpr(E->getArg(1));
22495 unsigned IntNo;
22496 switch (BuiltinID) {
22497 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22498 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22499 IntNo = Intrinsic::wasm_relaxed_min;
22500 break;
22501 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22502 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22503 IntNo = Intrinsic::wasm_relaxed_max;
22504 break;
22505 default:
22506 llvm_unreachable("unexpected builtin ID");
22507 }
22508 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
22509 return Builder.CreateCall(Callee, {LHS, RHS});
22510 }
22511 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22512 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22513 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22514 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22515 Value *Vec = EmitScalarExpr(E->getArg(0));
22516 unsigned IntNo;
22517 switch (BuiltinID) {
22518 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22519 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22520 break;
22521 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22522 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22523 break;
22524 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22525 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22526 break;
22527 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22528 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22529 break;
22530 default:
22531 llvm_unreachable("unexpected builtin ID");
22532 }
22533 Function *Callee = CGM.getIntrinsic(IntNo);
22534 return Builder.CreateCall(Callee, {Vec});
22535 }
22536 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22537 Value *LHS = EmitScalarExpr(E->getArg(0));
22538 Value *RHS = EmitScalarExpr(E->getArg(1));
22539 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
22540 return Builder.CreateCall(Callee, {LHS, RHS});
22541 }
22542 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22543 Value *LHS = EmitScalarExpr(E->getArg(0));
22544 Value *RHS = EmitScalarExpr(E->getArg(1));
22545 Function *Callee =
22546 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
22547 return Builder.CreateCall(Callee, {LHS, RHS});
22548 }
22549 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22550 Value *LHS = EmitScalarExpr(E->getArg(0));
22551 Value *RHS = EmitScalarExpr(E->getArg(1));
22552 Value *Acc = EmitScalarExpr(E->getArg(2));
22553 Function *Callee =
22554 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22555 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22556 }
22557 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22558 Value *LHS = EmitScalarExpr(E->getArg(0));
22559 Value *RHS = EmitScalarExpr(E->getArg(1));
22560 Value *Acc = EmitScalarExpr(E->getArg(2));
22561 Function *Callee =
22562 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
22563 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22564 }
22565 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22566 Value *Addr = EmitScalarExpr(E->getArg(0));
22567 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
22568 return Builder.CreateCall(Callee, {Addr});
22569 }
22570 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22571 Value *Val = EmitScalarExpr(E->getArg(0));
22572 Value *Addr = EmitScalarExpr(E->getArg(1));
22573 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
22574 return Builder.CreateCall(Callee, {Val, Addr});
22575 }
22576 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22577 Value *Val = EmitScalarExpr(E->getArg(0));
22578 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
22579 return Builder.CreateCall(Callee, {Val});
22580 }
22581 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22582 Value *Vector = EmitScalarExpr(E->getArg(0));
22583 Value *Index = EmitScalarExpr(E->getArg(1));
22584 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
22585 return Builder.CreateCall(Callee, {Vector, Index});
22586 }
22587 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22588 Value *Vector = EmitScalarExpr(E->getArg(0));
22589 Value *Index = EmitScalarExpr(E->getArg(1));
22590 Value *Val = EmitScalarExpr(E->getArg(2));
22591 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
22592 return Builder.CreateCall(Callee, {Vector, Index, Val});
22593 }
22594 case WebAssembly::BI__builtin_wasm_table_get: {
22595 assert(E->getArg(0)->getType()->isArrayType());
22596 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22597 Value *Index = EmitScalarExpr(E->getArg(1));
22600 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
22601 else if (E->getType().isWebAssemblyFuncrefType())
22602 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
22603 else
22604 llvm_unreachable(
22605 "Unexpected reference type for __builtin_wasm_table_get");
22606 return Builder.CreateCall(Callee, {Table, Index});
22607 }
22608 case WebAssembly::BI__builtin_wasm_table_set: {
22609 assert(E->getArg(0)->getType()->isArrayType());
22610 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22611 Value *Index = EmitScalarExpr(E->getArg(1));
22612 Value *Val = EmitScalarExpr(E->getArg(2));
22614 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22615 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
22616 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22617 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
22618 else
22619 llvm_unreachable(
22620 "Unexpected reference type for __builtin_wasm_table_set");
22621 return Builder.CreateCall(Callee, {Table, Index, Val});
22622 }
22623 case WebAssembly::BI__builtin_wasm_table_size: {
22624 assert(E->getArg(0)->getType()->isArrayType());
22625 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22626 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
22627 return Builder.CreateCall(Callee, Value);
22628 }
22629 case WebAssembly::BI__builtin_wasm_table_grow: {
22630 assert(E->getArg(0)->getType()->isArrayType());
22631 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22632 Value *Val = EmitScalarExpr(E->getArg(1));
22633 Value *NElems = EmitScalarExpr(E->getArg(2));
22634
22636 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
22637 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
22638 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22639 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22640 else
22641 llvm_unreachable(
22642 "Unexpected reference type for __builtin_wasm_table_grow");
22643
22644 return Builder.CreateCall(Callee, {Table, Val, NElems});
22645 }
22646 case WebAssembly::BI__builtin_wasm_table_fill: {
22647 assert(E->getArg(0)->getType()->isArrayType());
22648 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22649 Value *Index = EmitScalarExpr(E->getArg(1));
22650 Value *Val = EmitScalarExpr(E->getArg(2));
22651 Value *NElems = EmitScalarExpr(E->getArg(3));
22652
22654 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22655 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
22656 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22657 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22658 else
22659 llvm_unreachable(
22660 "Unexpected reference type for __builtin_wasm_table_fill");
22661
22662 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22663 }
22664 case WebAssembly::BI__builtin_wasm_table_copy: {
22665 assert(E->getArg(0)->getType()->isArrayType());
22666 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22667 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
22668 Value *DstIdx = EmitScalarExpr(E->getArg(2));
22669 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
22670 Value *NElems = EmitScalarExpr(E->getArg(4));
22671
22672 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
22673
22674 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22675 }
22676 default:
22677 return nullptr;
22678 }
22679}
22680
22681static std::pair<Intrinsic::ID, unsigned>
22683 struct Info {
22684 unsigned BuiltinID;
22685 Intrinsic::ID IntrinsicID;
22686 unsigned VecLen;
22687 };
22688 static Info Infos[] = {
22689#define CUSTOM_BUILTIN_MAPPING(x,s) \
22690 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22691 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
22692 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
22693 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
22694 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
22695 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
22696 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
22697 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
22698 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
22699 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
22700 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
22701 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
22702 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
22703 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
22704 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
22705 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
22706 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
22707 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
22708 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
22709 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
22710 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
22711 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
22712 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
22713 // Legacy builtins that take a vector in place of a vector predicate.
22714 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
22715 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
22716 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
22717 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
22718 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
22719 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
22720 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
22721 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
22722#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22723#undef CUSTOM_BUILTIN_MAPPING
22724 };
22725
22726 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
22727 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
22728 (void)SortOnce;
22729
22730 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22731 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22732 return {Intrinsic::not_intrinsic, 0};
22733
22734 return {F->IntrinsicID, F->VecLen};
22735}
22736
22738 const CallExpr *E) {
22739 Intrinsic::ID ID;
22740 unsigned VecLen;
22741 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
22742
22743 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
22744 // The base pointer is passed by address, so it needs to be loaded.
22745 Address A = EmitPointerWithAlignment(E->getArg(0));
22747 llvm::Value *Base = Builder.CreateLoad(BP);
22748 // The treatment of both loads and stores is the same: the arguments for
22749 // the builtin are the same as the arguments for the intrinsic.
22750 // Load:
22751 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22752 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22753 // Store:
22754 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22755 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22757 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
22758 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22759
22760 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
22761 // The load intrinsics generate two results (Value, NewBase), stores
22762 // generate one (NewBase). The new base address needs to be stored.
22763 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
22764 : Result;
22765 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
22766 Address Dest = EmitPointerWithAlignment(E->getArg(0));
22767 llvm::Value *RetVal =
22768 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
22769 if (IsLoad)
22770 RetVal = Builder.CreateExtractValue(Result, 0);
22771 return RetVal;
22772 };
22773
22774 // Handle the conversion of bit-reverse load intrinsics to bit code.
22775 // The intrinsic call after this function only reads from memory and the
22776 // write to memory is dealt by the store instruction.
22777 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
22778 // The intrinsic generates one result, which is the new value for the base
22779 // pointer. It needs to be returned. The result of the load instruction is
22780 // passed to intrinsic by address, so the value needs to be stored.
22781 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
22782
22783 // Expressions like &(*pt++) will be incremented per evaluation.
22784 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22785 // per call.
22786 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
22787 DestAddr = DestAddr.withElementType(Int8Ty);
22788 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
22789
22790 // Operands are Base, Dest, Modifier.
22791 // The intrinsic format in LLVM IR is defined as
22792 // { ValueType, i8* } (i8*, i32).
22793 llvm::Value *Result = Builder.CreateCall(
22794 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
22795
22796 // The value needs to be stored as the variable is passed by reference.
22797 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
22798
22799 // The store needs to be truncated to fit the destination type.
22800 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
22801 // to be handled with stores of respective destination type.
22802 DestVal = Builder.CreateTrunc(DestVal, DestTy);
22803
22804 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
22805 // The updated value of the base pointer is returned.
22806 return Builder.CreateExtractValue(Result, 1);
22807 };
22808
22809 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
22810 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
22811 : Intrinsic::hexagon_V6_vandvrt;
22812 return Builder.CreateCall(CGM.getIntrinsic(ID),
22813 {Vec, Builder.getInt32(-1)});
22814 };
22815 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
22816 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
22817 : Intrinsic::hexagon_V6_vandqrt;
22818 return Builder.CreateCall(CGM.getIntrinsic(ID),
22819 {Pred, Builder.getInt32(-1)});
22820 };
22821
22822 switch (BuiltinID) {
22823 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
22824 // and the corresponding C/C++ builtins use loads/stores to update
22825 // the predicate.
22826 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
22827 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
22828 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
22829 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
22830 // Get the type from the 0-th argument.
22831 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22832 Address PredAddr =
22833 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22834 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
22835 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22836 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
22837
22838 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22839 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22840 PredAddr.getAlignment());
22841 return Builder.CreateExtractValue(Result, 0);
22842 }
22843 // These are identical to the builtins above, except they don't consume
22844 // input carry, only generate carry-out. Since they still produce two
22845 // outputs, generate the store of the predicate, but no load.
22846 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
22847 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
22848 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
22849 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
22850 // Get the type from the 0-th argument.
22851 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22852 Address PredAddr =
22853 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22854 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22855 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22856
22857 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22858 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22859 PredAddr.getAlignment());
22860 return Builder.CreateExtractValue(Result, 0);
22861 }
22862
22863 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
22864 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
22865 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
22866 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
22867 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
22868 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
22869 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
22870 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
22872 const Expr *PredOp = E->getArg(0);
22873 // There will be an implicit cast to a boolean vector. Strip it.
22874 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
22875 if (Cast->getCastKind() == CK_BitCast)
22876 PredOp = Cast->getSubExpr();
22877 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
22878 }
22879 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
22880 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22881 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
22882 }
22883
22884 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
22885 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
22886 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
22887 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
22888 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
22889 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
22890 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
22891 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
22892 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
22893 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
22894 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
22895 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
22896 return MakeCircOp(ID, /*IsLoad=*/true);
22897 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
22898 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
22899 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
22900 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
22901 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
22902 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
22903 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
22904 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
22905 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
22906 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
22907 return MakeCircOp(ID, /*IsLoad=*/false);
22908 case Hexagon::BI__builtin_brev_ldub:
22909 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
22910 case Hexagon::BI__builtin_brev_ldb:
22911 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
22912 case Hexagon::BI__builtin_brev_lduh:
22913 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
22914 case Hexagon::BI__builtin_brev_ldh:
22915 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
22916 case Hexagon::BI__builtin_brev_ldw:
22917 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
22918 case Hexagon::BI__builtin_brev_ldd:
22919 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
22920 } // switch
22921
22922 return nullptr;
22923}
22924
22926 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
22927 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
22928 return EmitRISCVCpuIs(CPUStr);
22929}
22930
22931Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) {
22932 llvm::Type *Int32Ty = Builder.getInt32Ty();
22933 llvm::Type *Int64Ty = Builder.getInt64Ty();
22934 llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty);
22935 llvm::Constant *RISCVCPUModel =
22936 CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model");
22937 cast<llvm::GlobalValue>(RISCVCPUModel)->setDSOLocal(true);
22938
22939 auto loadRISCVCPUID = [&](unsigned Index) {
22940 Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index);
22941 Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index),
22942 Ptr, llvm::MaybeAlign());
22943 return CPUID;
22944 };
22945
22946 const llvm::RISCV::CPUModel Model = llvm::RISCV::getCPUModel(CPUStr);
22947
22948 // Compare mvendorid.
22949 Value *VendorID = loadRISCVCPUID(0);
22950 Value *Result =
22951 Builder.CreateICmpEQ(VendorID, Builder.getInt32(Model.MVendorID));
22952
22953 // Compare marchid.
22954 Value *ArchID = loadRISCVCPUID(1);
22955 Result = Builder.CreateAnd(
22956 Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(Model.MArchID)));
22957
22958 // Compare mimpid.
22959 Value *ImpID = loadRISCVCPUID(2);
22960 Result = Builder.CreateAnd(
22961 Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(Model.MImpID)));
22962
22963 return Result;
22964}
22965
22967 const CallExpr *E,
22968 ReturnValueSlot ReturnValue) {
22969
22970 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
22971 return EmitRISCVCpuSupports(E);
22972 if (BuiltinID == Builtin::BI__builtin_cpu_init)
22973 return EmitRISCVCpuInit();
22974 if (BuiltinID == Builtin::BI__builtin_cpu_is)
22975 return EmitRISCVCpuIs(E);
22976
22978 llvm::Type *ResultType = ConvertType(E->getType());
22979
22980 // Find out if any arguments are required to be integer constant expressions.
22981 unsigned ICEArguments = 0;
22983 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
22984 if (Error == ASTContext::GE_Missing_type) {
22985 // Vector intrinsics don't have a type string.
22986 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
22987 BuiltinID <= clang::RISCV::LastRVVBuiltin);
22988 ICEArguments = 0;
22989 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
22990 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
22991 ICEArguments = 1 << 1;
22992 } else {
22993 assert(Error == ASTContext::GE_None && "Unexpected error");
22994 }
22995
22996 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
22997 ICEArguments |= (1 << 1);
22998 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
22999 ICEArguments |= (1 << 2);
23000
23001 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
23002 // Handle aggregate argument, namely RVV tuple types in segment load/store
23003 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
23004 LValue L = EmitAggExprToLValue(E->getArg(i));
23005 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
23006 Ops.push_back(AggValue);
23007 continue;
23008 }
23009 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
23010 }
23011
23012 Intrinsic::ID ID = Intrinsic::not_intrinsic;
23013 // The 0th bit simulates the `vta` of RVV
23014 // The 1st bit simulates the `vma` of RVV
23015 constexpr unsigned RVV_VTA = 0x1;
23016 constexpr unsigned RVV_VMA = 0x2;
23017 int PolicyAttrs = 0;
23018 bool IsMasked = false;
23019 // This is used by segment load/store to determine it's llvm type.
23020 unsigned SegInstSEW = 8;
23021
23022 // Required for overloaded intrinsics.
23024 switch (BuiltinID) {
23025 default: llvm_unreachable("unexpected builtin ID");
23026 case RISCV::BI__builtin_riscv_orc_b_32:
23027 case RISCV::BI__builtin_riscv_orc_b_64:
23028 case RISCV::BI__builtin_riscv_clmul_32:
23029 case RISCV::BI__builtin_riscv_clmul_64:
23030 case RISCV::BI__builtin_riscv_clmulh_32:
23031 case RISCV::BI__builtin_riscv_clmulh_64:
23032 case RISCV::BI__builtin_riscv_clmulr_32:
23033 case RISCV::BI__builtin_riscv_clmulr_64:
23034 case RISCV::BI__builtin_riscv_xperm4_32:
23035 case RISCV::BI__builtin_riscv_xperm4_64:
23036 case RISCV::BI__builtin_riscv_xperm8_32:
23037 case RISCV::BI__builtin_riscv_xperm8_64:
23038 case RISCV::BI__builtin_riscv_brev8_32:
23039 case RISCV::BI__builtin_riscv_brev8_64:
23040 case RISCV::BI__builtin_riscv_zip_32:
23041 case RISCV::BI__builtin_riscv_unzip_32: {
23042 switch (BuiltinID) {
23043 default: llvm_unreachable("unexpected builtin ID");
23044 // Zbb
23045 case RISCV::BI__builtin_riscv_orc_b_32:
23046 case RISCV::BI__builtin_riscv_orc_b_64:
23047 ID = Intrinsic::riscv_orc_b;
23048 break;
23049
23050 // Zbc
23051 case RISCV::BI__builtin_riscv_clmul_32:
23052 case RISCV::BI__builtin_riscv_clmul_64:
23053 ID = Intrinsic::riscv_clmul;
23054 break;
23055 case RISCV::BI__builtin_riscv_clmulh_32:
23056 case RISCV::BI__builtin_riscv_clmulh_64:
23057 ID = Intrinsic::riscv_clmulh;
23058 break;
23059 case RISCV::BI__builtin_riscv_clmulr_32:
23060 case RISCV::BI__builtin_riscv_clmulr_64:
23061 ID = Intrinsic::riscv_clmulr;
23062 break;
23063
23064 // Zbkx
23065 case RISCV::BI__builtin_riscv_xperm8_32:
23066 case RISCV::BI__builtin_riscv_xperm8_64:
23067 ID = Intrinsic::riscv_xperm8;
23068 break;
23069 case RISCV::BI__builtin_riscv_xperm4_32:
23070 case RISCV::BI__builtin_riscv_xperm4_64:
23071 ID = Intrinsic::riscv_xperm4;
23072 break;
23073
23074 // Zbkb
23075 case RISCV::BI__builtin_riscv_brev8_32:
23076 case RISCV::BI__builtin_riscv_brev8_64:
23077 ID = Intrinsic::riscv_brev8;
23078 break;
23079 case RISCV::BI__builtin_riscv_zip_32:
23080 ID = Intrinsic::riscv_zip;
23081 break;
23082 case RISCV::BI__builtin_riscv_unzip_32:
23083 ID = Intrinsic::riscv_unzip;
23084 break;
23085 }
23086
23087 IntrinsicTypes = {ResultType};
23088 break;
23089 }
23090
23091 // Zk builtins
23092
23093 // Zknh
23094 case RISCV::BI__builtin_riscv_sha256sig0:
23095 ID = Intrinsic::riscv_sha256sig0;
23096 break;
23097 case RISCV::BI__builtin_riscv_sha256sig1:
23098 ID = Intrinsic::riscv_sha256sig1;
23099 break;
23100 case RISCV::BI__builtin_riscv_sha256sum0:
23101 ID = Intrinsic::riscv_sha256sum0;
23102 break;
23103 case RISCV::BI__builtin_riscv_sha256sum1:
23104 ID = Intrinsic::riscv_sha256sum1;
23105 break;
23106
23107 // Zksed
23108 case RISCV::BI__builtin_riscv_sm4ks:
23109 ID = Intrinsic::riscv_sm4ks;
23110 break;
23111 case RISCV::BI__builtin_riscv_sm4ed:
23112 ID = Intrinsic::riscv_sm4ed;
23113 break;
23114
23115 // Zksh
23116 case RISCV::BI__builtin_riscv_sm3p0:
23117 ID = Intrinsic::riscv_sm3p0;
23118 break;
23119 case RISCV::BI__builtin_riscv_sm3p1:
23120 ID = Intrinsic::riscv_sm3p1;
23121 break;
23122
23123 case RISCV::BI__builtin_riscv_clz_32:
23124 case RISCV::BI__builtin_riscv_clz_64: {
23125 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
23126 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23127 if (Result->getType() != ResultType)
23128 Result =
23129 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23130 return Result;
23131 }
23132 case RISCV::BI__builtin_riscv_ctz_32:
23133 case RISCV::BI__builtin_riscv_ctz_64: {
23134 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
23135 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23136 if (Result->getType() != ResultType)
23137 Result =
23138 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23139 return Result;
23140 }
23141
23142 // Zihintntl
23143 case RISCV::BI__builtin_riscv_ntl_load: {
23144 llvm::Type *ResTy = ConvertType(E->getType());
23145 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23146 if (Ops.size() == 2)
23147 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
23148
23149 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23151 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23152 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23153 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23154
23155 int Width;
23156 if(ResTy->isScalableTy()) {
23157 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
23158 llvm::Type *ScalarTy = ResTy->getScalarType();
23159 Width = ScalarTy->getPrimitiveSizeInBits() *
23160 SVTy->getElementCount().getKnownMinValue();
23161 } else
23162 Width = ResTy->getPrimitiveSizeInBits();
23163 LoadInst *Load = Builder.CreateLoad(
23164 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
23165
23166 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23167 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23168 RISCVDomainNode);
23169
23170 return Load;
23171 }
23172 case RISCV::BI__builtin_riscv_ntl_store: {
23173 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23174 if (Ops.size() == 3)
23175 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
23176
23177 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23179 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23180 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23181 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23182
23183 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
23184 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23185 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23186 RISCVDomainNode);
23187
23188 return Store;
23189 }
23190 // XCValu
23191 case RISCV::BI__builtin_riscv_cv_alu_addN:
23192 ID = Intrinsic::riscv_cv_alu_addN;
23193 break;
23194 case RISCV::BI__builtin_riscv_cv_alu_addRN:
23195 ID = Intrinsic::riscv_cv_alu_addRN;
23196 break;
23197 case RISCV::BI__builtin_riscv_cv_alu_adduN:
23198 ID = Intrinsic::riscv_cv_alu_adduN;
23199 break;
23200 case RISCV::BI__builtin_riscv_cv_alu_adduRN:
23201 ID = Intrinsic::riscv_cv_alu_adduRN;
23202 break;
23203 case RISCV::BI__builtin_riscv_cv_alu_clip:
23204 ID = Intrinsic::riscv_cv_alu_clip;
23205 break;
23206 case RISCV::BI__builtin_riscv_cv_alu_clipu:
23207 ID = Intrinsic::riscv_cv_alu_clipu;
23208 break;
23209 case RISCV::BI__builtin_riscv_cv_alu_extbs:
23210 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23211 "extbs");
23212 case RISCV::BI__builtin_riscv_cv_alu_extbz:
23213 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23214 "extbz");
23215 case RISCV::BI__builtin_riscv_cv_alu_exths:
23216 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23217 "exths");
23218 case RISCV::BI__builtin_riscv_cv_alu_exthz:
23219 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23220 "exthz");
23221 case RISCV::BI__builtin_riscv_cv_alu_slet:
23222 return Builder.CreateZExt(Builder.CreateICmpSLE(Ops[0], Ops[1]), Int32Ty,
23223 "sle");
23224 case RISCV::BI__builtin_riscv_cv_alu_sletu:
23225 return Builder.CreateZExt(Builder.CreateICmpULE(Ops[0], Ops[1]), Int32Ty,
23226 "sleu");
23227 case RISCV::BI__builtin_riscv_cv_alu_subN:
23228 ID = Intrinsic::riscv_cv_alu_subN;
23229 break;
23230 case RISCV::BI__builtin_riscv_cv_alu_subRN:
23231 ID = Intrinsic::riscv_cv_alu_subRN;
23232 break;
23233 case RISCV::BI__builtin_riscv_cv_alu_subuN:
23234 ID = Intrinsic::riscv_cv_alu_subuN;
23235 break;
23236 case RISCV::BI__builtin_riscv_cv_alu_subuRN:
23237 ID = Intrinsic::riscv_cv_alu_subuRN;
23238 break;
23239
23240 // Vector builtins are handled from here.
23241#include "clang/Basic/riscv_vector_builtin_cg.inc"
23242
23243 // SiFive Vector builtins are handled from here.
23244#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
23245 }
23246
23247 assert(ID != Intrinsic::not_intrinsic);
23248
23249 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
23250 return Builder.CreateCall(F, Ops, "");
23251}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3460
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8819
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9655
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:377
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1414
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6845
static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch, CGHLSLRuntime &RT, QualType QT)
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:569
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2307
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2273
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6714
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2764
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9625
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:1029
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9618
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7860
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9845
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7872
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7842
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8887
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2642
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:622
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:967
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:672
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6841
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7873
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1611
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7877
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
static Value * loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder, CodeGenModule &CGM)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:1089
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:791
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2670
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:646
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:997
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9614
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7874
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9692
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6597
constexpr unsigned SVEBitsPerBlock
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2007
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7684
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6838
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:143
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:689
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:429
static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo)
Checks no arguments or results are passed indirectly in the ABI (i.e.
Definition: CGBuiltin.cpp:903
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition: CGBuiltin.cpp:250
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9719
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1848
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1697
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1475
static Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:766
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6850
@ UnsignedAlts
Definition: CGBuiltin.cpp:6808
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6813
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6817
@ Use64BitVectors
Definition: CGBuiltin.cpp:6810
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6805
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6815
@ InventFloatType
Definition: CGBuiltin.cpp:6807
@ AddRetType
Definition: CGBuiltin.cpp:6800
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6802
@ VectorizeRetType
Definition: CGBuiltin.cpp:6804
@ VectorRet
Definition: CGBuiltin.cpp:6814
@ Add1ArgType
Definition: CGBuiltin.cpp:6801
@ Use128BitVectors
Definition: CGBuiltin.cpp:6811
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:869
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:860
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2500
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:1037
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1550
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9681
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:726
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:985
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2554
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2693
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6673
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:420
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9707
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:348
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8798
static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:827
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:79
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8790
@ VolatileRead
Definition: CGBuiltin.cpp:8792
@ NormalRead
Definition: CGBuiltin.cpp:8791
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:514
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:359
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2542
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:474
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:706
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:337
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9647
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7869
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:7169
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:401
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:952
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7935
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:779
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:809
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:658
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2775
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1424
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2508
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:745
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:914
static Value * handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:213
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:412
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1460
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8717
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2267
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:633
static Value * handleHlslClip(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:102
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7871
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7444
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:147
unsigned Iter
Definition: HTMLLogger.cpp:153
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:144
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition: SemaHLSL.cpp:43
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
C Language Family Type Representation.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:488
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
IdentifierTable & Idents
Definition: ASTContext.h:680
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:682
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType IntTy
Definition: ASTContext.h:1169
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2203
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2770
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2489
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1160
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:799
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2391
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2394
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3577
QualType getElementType() const
Definition: Type.h:3589
static std::unique_ptr< AtomicScopeModel > create(AtomicScopeModelKind K)
Create an atomic scope model by AtomicScopeModelKind.
Definition: SyncScope.h:273
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:150
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:248
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:252
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:123
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2874
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:858
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:898
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:915
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:389
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:381
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:405
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:151
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:398
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:219
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:315
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:437
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:365
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:261
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:346
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
MutableArrayRef< ArgInfo > arguments()
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:305
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID, bool NoMerge=false)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, QualType Ty)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
llvm::Value * EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
llvm::Value * EmitCheckedArgForAssume(const Expr *E)
Emits an argument for a call to a __builtin_assume.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerKind::SanitizerOrdinal > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
llvm::Value * EmitRISCVCpuIs(const CallExpr *E)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
llvm::Type * ConvertType(QualType T)
void EmitWritebacks(const CallArgList &Args)
EmitWriteback - Emit callbacks for function.
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2913
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:263
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1630
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:667
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:382
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
llvm::Value * getAggregatePointer(QualType PointeeType, CodeGenFunction &CGF) const
Definition: CGValue.h:88
bool isScalar() const
Definition: CGValue.h:64
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:386
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:170
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:160
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:142
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:179
Complex values, per C99 6.2.5p11.
Definition: Type.h:3145
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4232
Represents a sugar type with __counted_by or __sized_by annotations, including their _or_null variant...
Definition: Type.h:3306
DynamicCountPointerKind getKind() const
Definition: Type.h:3336
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:2036
T * getAttr() const
Definition: DeclBase.h:576
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:596
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:254
DeclContext * getDeclContext()
Definition: DeclBase.h:451
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:440
bool hasAttr() const
Definition: DeclBase.h:580
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3102
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3097
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3093
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3594
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3077
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3970
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:276
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:225
Represents difference between two FPOptions values.
Definition: LangOptions.h:978
Represents a member of a struct/union/class.
Definition: Decl.h:3033
const FieldDecl * findCountedByField() const
Find the FieldDecl specified in a FAM's "counted_by" attribute.
Definition: Decl.cpp:4718
Represents a function declaration or definition.
Definition: Decl.h:1935
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2672
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3649
Represents a prototype with parameter type info, e.g.
Definition: Type.h:5107
@ SME_PStateSMEnabledMask
Definition: Type.h:4587
@ SME_PStateSMCompatibleMask
Definition: Type.h:4588
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5417
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:296
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7785
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3198
QualType getPointeeType() const
Definition: Type.h:3208
A (possibly-)qualified type.
Definition: Type.h:929
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:8020
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2893
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:8062
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2889
The collection of all-type qualifiers we support.
Definition: Type.h:324
Represents a struct/union/class.
Definition: Decl.h:4162
field_range fields() const
Definition: Decl.h:4376
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool setsFPMR() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:346
bool isUnion() const
Definition: Decl.h:3784
bool areArgsDestroyedLeftToRightInCallee() const
Are arguments to a call destroyed left to right in the callee? This is a fundamental language change,...
Definition: TargetCXXABI.h:188
Exposes information about the current target.
Definition: TargetInfo.h:220
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:311
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:697
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1262
bool isLittleEndian() const
Definition: TargetInfo.h:1673
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:861
bool isBigEndian() const
Definition: TargetInfo.h:1672
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
Definition: TargetInfo.h:1333
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1679
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:733
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1828
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1916
bool isBlockPointerType() const
Definition: Type.h:8205
bool isVoidType() const
Definition: Type.h:8515
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2180
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:710
bool hasIntegerRepresentation() const
Determine whether this type has an integer representation of some sort, e.g., it is an integer type o...
Definition: Type.cpp:2055
bool isArrayType() const
Definition: Type.h:8263
bool isCountAttributedType() const
Definition: Type.cpp:727
bool isPointerType() const
Definition: Type.h:8191
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8555
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8805
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8630
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2270
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2220
bool isObjCObjectPointerType() const
Definition: Type.h:8333
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2292
bool isVectorType() const
Definition: Type.h:8303
bool isFloatingType() const
Definition: Type.cpp:2283
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2230
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8736
bool isRecordType() const
Definition: Type.h:8291
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2513
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2581
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1920
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
QualType getType() const
Definition: Decl.h:682
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:4034
unsigned getNumElements() const
Definition: Type.h:4049
QualType getElementType() const
Definition: Type.h:4048
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: FixedPoint.h:19
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1212
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2350
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:447
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:726
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1693
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2126
bool Ret(InterpState &S, CodePtr &PC)
Definition: Interp.h:318
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
SyncScope
Defines synch scope values used internally by clang.
Definition: SyncScope.h:42
llvm::StringRef getAsString(SyncScope S)
Definition: SyncScope.h:60
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
int int32_t
unsigned long uint64_t
long int64_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:190
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:182
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:169
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742