LLVM 23.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "X86.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86ISelLowering.h"
19#include "X86InstrBuilder.h"
21#include "X86TargetMachine.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/Module.h"
31
32#define DEBUG_TYPE "x86-isel"
33
34using namespace llvm;
35
36STATISTIC(NumTailCalls, "Number of tail calls");
37
38/// Call this when the user attempts to do something unsupported, like
39/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
40/// report_fatal_error, so calling code should attempt to recover without
41/// crashing.
42static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
43 const char *Msg) {
45 DAG.getContext()->diagnose(
47}
48
49/// Returns true if a CC can dynamically exclude a register from the list of
50/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
51/// the return registers.
53 switch (CC) {
54 default:
55 return false;
59 return true;
60 }
61}
62
63/// Returns true if a CC can dynamically exclude a register from the list of
64/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
65/// the parameters.
69
70static std::pair<MVT, unsigned>
72 const X86Subtarget &Subtarget) {
73 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
74 // convention is one that uses k registers.
75 if (NumElts == 2)
76 return {MVT::v2i64, 1};
77 if (NumElts == 4)
78 return {MVT::v4i32, 1};
79 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
81 return {MVT::v8i16, 1};
82 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
84 return {MVT::v16i8, 1};
85 // v32i1 passes in ymm unless we have BWI and the calling convention is
86 // regcall.
87 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
88 return {MVT::v32i8, 1};
89 // Split v64i1 vectors if we don't have v64i8 available.
90 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
91 if (Subtarget.useAVX512Regs())
92 return {MVT::v64i8, 1};
93 return {MVT::v32i8, 2};
94 }
95
96 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
97 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
98 NumElts > 64)
99 return {MVT::i8, NumElts};
100
102}
103
106 EVT VT) const {
107 if (VT.isVector()) {
108 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
109 unsigned NumElts = VT.getVectorNumElements();
110
111 MVT RegisterVT;
112 unsigned NumRegisters;
113 std::tie(RegisterVT, NumRegisters) =
114 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
115 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
116 return RegisterVT;
117 }
118
119 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
120 return MVT::v8f16;
121 }
122
123 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
124 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
125 !Subtarget.hasX87())
126 return MVT::i32;
127
128 if (isTypeLegal(MVT::f16)) {
129 if (VT.isVectorOf(MVT::bf16))
131 Context, CC, VT.changeVectorElementType(Context, MVT::f16));
132
133 if (VT == MVT::bf16)
134 return MVT::f16;
135 }
136
137 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
138}
139
142 EVT VT) const {
143 if (VT.isVector()) {
144 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
145 unsigned NumElts = VT.getVectorNumElements();
146
147 MVT RegisterVT;
148 unsigned NumRegisters;
149 std::tie(RegisterVT, NumRegisters) =
150 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
151 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
152 return NumRegisters;
153 }
154
155 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
156 return 1;
157 }
158
159 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
160 // x87 is disabled.
161 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
162 if (VT == MVT::f64)
163 return 2;
164 if (VT == MVT::f80)
165 return 3;
166 }
167
168 if (VT.isVectorOf(MVT::bf16) && isTypeLegal(MVT::f16))
170 Context, CC, VT.changeVectorElementType(Context, MVT::f16));
171
172 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
173}
174
176 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
177 unsigned &NumIntermediates, MVT &RegisterVT) const {
178 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
179 if (VT.isVectorOf(MVT::i1) && Subtarget.hasAVX512() &&
181 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
182 VT.getVectorNumElements() > 64)) {
183 RegisterVT = MVT::i8;
184 IntermediateVT = MVT::i1;
185 NumIntermediates = VT.getVectorNumElements();
186 return NumIntermediates;
187 }
188
189 // Split v64i1 vectors if we don't have v64i8 available.
190 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
192 RegisterVT = MVT::v32i8;
193 IntermediateVT = MVT::v32i1;
194 NumIntermediates = 2;
195 return 2;
196 }
197
198 // Split vNbf16 vectors according to vNf16.
199 if (VT.isVectorOf(MVT::bf16) && isTypeLegal(MVT::f16))
200 VT = VT.changeVectorElementType(Context, MVT::f16);
201
202 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
203 NumIntermediates, RegisterVT);
204}
205
207 LLVMContext& Context,
208 EVT VT) const {
209 if (!VT.isVector())
210 return MVT::i8;
211
212 if (Subtarget.hasAVX512()) {
213 // Figure out what this type will be legalized to.
214 EVT LegalVT = VT;
215 while (getTypeAction(Context, LegalVT) != TypeLegal)
216 LegalVT = getTypeToTransformTo(Context, LegalVT);
217
218 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
219 if (LegalVT.getSimpleVT().is512BitVector())
220 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
221
222 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
223 // If we legalized to less than a 512-bit vector, then we will use a vXi1
224 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
225 // vXi16/vXi8.
226 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
227 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
228 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
229 }
230 }
231
233}
234
236 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
237 const DataLayout &DL) const {
238 // On x86-64 i128 is split into two i64s and needs to be allocated to two
239 // consecutive registers, or spilled to the stack as a whole. On x86-32 i128
240 // is split to four i32s and never actually passed in registers, but we use
241 // the consecutive register mark to match it in TableGen.
242 if (Ty->isIntegerTy(128))
243 return true;
244
245 // On x86-32, fp128 acts the same as i128.
246 if (Subtarget.is32Bit() && Ty->isFP128Ty())
247 return true;
248
249 return false;
250}
251
252/// Helper for getByValTypeAlignment to determine
253/// the desired ByVal argument alignment.
254static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
255 if (MaxAlign == 16)
256 return;
257 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
258 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
259 MaxAlign = Align(16);
260 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
261 Align EltAlign;
262 getMaxByValAlign(ATy->getElementType(), EltAlign);
263 if (EltAlign > MaxAlign)
264 MaxAlign = EltAlign;
265 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
266 for (auto *EltTy : STy->elements()) {
267 Align EltAlign;
268 getMaxByValAlign(EltTy, EltAlign);
269 if (EltAlign > MaxAlign)
270 MaxAlign = EltAlign;
271 if (MaxAlign == 16)
272 break;
273 }
274 }
275}
276
277/// Return the desired alignment for ByVal aggregate
278/// function arguments in the caller parameter area. For X86, aggregates
279/// that contain SSE vectors are placed at 16-byte boundaries while the rest
280/// are at 4-byte boundaries.
282 const DataLayout &DL) const {
283 if (Subtarget.is64Bit())
284 return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
285
286 Align Alignment(4);
287 if (Subtarget.hasSSE1())
288 getMaxByValAlign(Ty, Alignment);
289 return Alignment;
290}
291
292/// It returns EVT::Other if the type should be determined using generic
293/// target-independent logic.
294/// For vector ops we check that the overall size isn't larger than our
295/// preferred vector width.
297 LLVMContext &Context, const MemOp &Op,
298 const AttributeList &FuncAttributes) const {
299 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
300 if (Op.size() >= 16 &&
301 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
302 // FIXME: Check if unaligned 64-byte accesses are slow.
303 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
304 (Subtarget.getPreferVectorWidth() >= 512)) {
305 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
306 }
307 // FIXME: Check if unaligned 32-byte accesses are slow.
308 if (Op.size() >= 32 && Subtarget.hasAVX() &&
309 Subtarget.useLight256BitInstructions()) {
310 // Although this isn't a well-supported type for AVX1, we'll let
311 // legalization and shuffle lowering produce the optimal codegen. If we
312 // choose an optimal type with a vector element larger than a byte,
313 // getMemsetStores() may create an intermediate splat (using an integer
314 // multiply) before we splat as a vector.
315 return MVT::v32i8;
316 }
317 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
318 return MVT::v16i8;
319 // TODO: Can SSE1 handle a byte vector?
320 // If we have SSE1 registers we should be able to use them.
321 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
322 (Subtarget.getPreferVectorWidth() >= 128))
323 return MVT::v4f32;
324 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
325 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
326 // Do not use f64 to lower memcpy if source is string constant. It's
327 // better to use i32 to avoid the loads.
328 // Also, do not use f64 to lower memset unless this is a memset of zeros.
329 // The gymnastics of splatting a byte value into an XMM register and then
330 // only using 8-byte stores (because this is a CPU with slow unaligned
331 // 16-byte accesses) makes that a loser.
332 return MVT::f64;
333 }
334 }
335 // This is a compromise. If we reach here, unaligned accesses may be slow on
336 // this target. However, creating smaller, aligned accesses could be even
337 // slower and would certainly be a lot more code.
338 if (Subtarget.is64Bit() && Op.size() >= 8)
339 return MVT::i64;
340 return MVT::i32;
341}
342
344 if (VT == MVT::f32)
345 return Subtarget.hasSSE1();
346 if (VT == MVT::f64)
347 return Subtarget.hasSSE2();
348 return true;
349}
350
351static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
352 return (8 * Alignment.value()) % SizeInBits == 0;
353}
354
356 if (isBitAligned(Alignment, VT.getSizeInBits()))
357 return true;
358 switch (VT.getSizeInBits()) {
359 default:
360 // 8-byte and under are always assumed to be fast.
361 return true;
362 case 128:
363 return !Subtarget.isUnalignedMem16Slow();
364 case 256:
365 return !Subtarget.isUnalignedMem32Slow();
366 // TODO: What about AVX-512 (512-bit) accesses?
367 }
368}
369
371 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
372 unsigned *Fast) const {
373 if (Fast)
374 *Fast = isMemoryAccessFast(VT, Alignment);
375 // NonTemporal vector memory ops must be aligned.
376 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
377 // NT loads can only be vector aligned, so if its less aligned than the
378 // minimum vector size (which we can split the vector down to), we might as
379 // well use a regular unaligned vector load.
380 // We don't have any NT loads pre-SSE41.
381 if (!!(Flags & MachineMemOperand::MOLoad))
382 return (Alignment < 16 || !Subtarget.hasSSE41());
383 return false;
384 }
385 // Misaligned accesses of any size are always allowed.
386 return true;
387}
388
390 const DataLayout &DL, EVT VT,
391 unsigned AddrSpace, Align Alignment,
393 unsigned *Fast) const {
394 if (Fast)
395 *Fast = isMemoryAccessFast(VT, Alignment);
396 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
397 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
398 /*Fast=*/nullptr))
399 return true;
400 // NonTemporal vector memory ops are special, and must be aligned.
401 if (!isBitAligned(Alignment, VT.getSizeInBits()))
402 return false;
403 switch (VT.getSizeInBits()) {
404 case 128:
405 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
406 return true;
407 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
408 return true;
409 return false;
410 case 256:
411 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
412 return true;
413 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
414 return true;
415 return false;
416 case 512:
417 if (Subtarget.hasAVX512())
418 return true;
419 return false;
420 default:
421 return false; // Don't have NonTemporal vector memory ops of this size.
422 }
423 }
424 return true;
425}
426
427/// Return the entry encoding for a jump table in the
428/// current function. The returned value is a member of the
429/// MachineJumpTableInfo::JTEntryKind enum.
431 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
432 // symbol.
433 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
435 if (isPositionIndependent() &&
437 !Subtarget.isTargetCOFF())
439
440 // Otherwise, use the normal jump table encoding heuristics.
442}
443
445 return Subtarget.useSoftFloat();
446}
447
449 ArgListTy &Args) const {
450
451 // Only relabel X86-32 for C / Stdcall CCs.
452 if (Subtarget.is64Bit())
453 return;
454 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
455 return;
456 unsigned ParamRegs = 0;
457 if (auto *M = MF->getFunction().getParent())
458 ParamRegs = M->getNumberRegisterParameters();
459
460 // Mark the first N int arguments as having reg
461 for (auto &Arg : Args) {
462 Type *T = Arg.Ty;
463 if (T->isIntOrPtrTy())
464 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
465 unsigned numRegs = 1;
466 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
467 numRegs = 2;
468 if (ParamRegs < numRegs)
469 return;
470 ParamRegs -= numRegs;
471 Arg.IsInReg = true;
472 }
473 }
474}
475
476const MCExpr *
478 const MachineBasicBlock *MBB,
479 unsigned uid,MCContext &Ctx) const{
480 assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
481 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
482 // entries.
483 return MCSymbolRefExpr::create(MBB->getSymbol(), X86::S_GOTOFF, Ctx);
484}
485
486/// Returns relocation base for the given PIC jumptable.
488 SelectionDAG &DAG) const {
489 if (!Subtarget.is64Bit())
490 // This doesn't have SDLoc associated with it, but is not really the
491 // same as a Register.
492 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
494 return Table;
495}
496
497/// This returns the relocation base for the given PIC jumptable,
498/// the same as getPICJumpTableRelocBase, but as an MCExpr.
500getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
501 MCContext &Ctx) const {
502 // X86-64 uses RIP relative addressing based on the jump table label.
503 if (Subtarget.isPICStyleRIPRel() ||
504 (Subtarget.is64Bit() &&
507
508 // Otherwise, the reference is relative to the PIC base.
509 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
510}
511
512std::pair<const TargetRegisterClass *, uint8_t>
514 MVT VT) const {
515 const TargetRegisterClass *RRC = nullptr;
516 uint8_t Cost = 1;
517 switch (VT.SimpleTy) {
518 default:
520 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
521 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
522 break;
523 case MVT::x86mmx:
524 RRC = &X86::VR64RegClass;
525 break;
526 case MVT::f32: case MVT::f64:
527 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
528 case MVT::v4f32: case MVT::v2f64:
529 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
530 case MVT::v8f32: case MVT::v4f64:
531 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
532 case MVT::v16f32: case MVT::v8f64:
533 RRC = &X86::VR128XRegClass;
534 break;
535 }
536 return std::make_pair(RRC, Cost);
537}
538
539unsigned X86TargetLowering::getAddressSpace() const {
540 if (Subtarget.is64Bit())
542 : X86AS::FS;
543 return X86AS::GS;
544}
545
546static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
547 return TargetTriple.isOSGlibc() || TargetTriple.isMusl() ||
548 TargetTriple.isOSFuchsia() || TargetTriple.isAndroid();
549}
550
557
558Value *
560 const LibcallLoweringInfo &Libcalls) const {
561 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
562 // tcbhead_t; use it instead of the usual global variable (see
563 // sysdeps/{i386,x86_64}/nptl/tls.h)
564 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
565 unsigned AddressSpace = getAddressSpace();
566
567 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
568 if (Subtarget.isTargetFuchsia())
569 return SegmentOffset(IRB, 0x10, AddressSpace);
570
571 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
572 // Specially, some users may customize the base reg and offset.
573 int Offset = M->getStackProtectorGuardOffset();
574 // If we don't set -stack-protector-guard-offset value:
575 // %fs:0x28, unless we're using a Kernel code model, in which case
576 // it's %gs:0x28. gs:0x14 on i386.
577 if (Offset == INT_MAX)
578 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
579
580 StringRef GuardReg = M->getStackProtectorGuardReg();
581 if (GuardReg == "fs")
583 else if (GuardReg == "gs")
585
586 // Use symbol guard if user specify.
587 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
588 if (!GuardSymb.empty()) {
589 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
590 if (!GV) {
591 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
592 : Type::getInt32Ty(M->getContext());
593 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
594 nullptr, GuardSymb, nullptr,
596 if (!Subtarget.isTargetDarwin())
597 GV->setDSOLocal(M->getDirectAccessExternalData());
598 }
599 return GV;
600 }
601
602 return SegmentOffset(IRB, Offset, AddressSpace);
603 }
604 return TargetLowering::getIRStackGuard(IRB, Libcalls);
605}
606
608 Module &M, const LibcallLoweringInfo &Libcalls) const {
609 // MSVC CRT provides functionalities for stack protection.
610 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
611 Libcalls.getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
612
613 RTLIB::LibcallImpl SecurityCookieVar =
614 Libcalls.getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
615 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
616 SecurityCookieVar != RTLIB::Unsupported) {
617 // MSVC CRT provides functionalities for stack protection.
618 // MSVC CRT has a global variable holding security cookie.
619 M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
620 PointerType::getUnqual(M.getContext()));
621
622 // MSVC CRT has a function to validate security cookie.
623 FunctionCallee SecurityCheckCookie =
624 M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
625 Type::getVoidTy(M.getContext()),
626 PointerType::getUnqual(M.getContext()));
627
628 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
629 F->setCallingConv(CallingConv::X86_FastCall);
630 F->addParamAttr(0, Attribute::AttrKind::InReg);
631 }
632 return;
633 }
634
635 StringRef GuardMode = M.getStackProtectorGuard();
636
637 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
638 if ((GuardMode == "tls" || GuardMode.empty()) &&
639 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
640 return;
642}
643
645 IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const {
646 // Android provides a fixed TLS slot for the SafeStack pointer. See the
647 // definition of TLS_SLOT_SAFESTACK in
648 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
649 if (Subtarget.isTargetAndroid()) {
650 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
651 // %gs:0x24 on i386
652 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
653 return SegmentOffset(IRB, Offset, getAddressSpace());
654 }
655
656 // Fuchsia is similar.
657 if (Subtarget.isTargetFuchsia()) {
658 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
659 return SegmentOffset(IRB, 0x18, getAddressSpace());
660 }
661
663}
664
665//===----------------------------------------------------------------------===//
666// Return Value Calling Convention Implementation
667//===----------------------------------------------------------------------===//
668
669bool X86TargetLowering::CanLowerReturn(
670 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
671 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
672 const Type *RetTy) const {
674 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
675 return CCInfo.CheckReturn(Outs, RetCC_X86);
676}
677
678const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
679 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
680 return ScratchRegs;
681}
682
684 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
685 return RCRegs;
686}
687
688/// Lowers masks values (v*i1) to the local register values
689/// \returns DAG node after lowering to register type
690static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
691 const SDLoc &DL, SelectionDAG &DAG) {
692 EVT ValVT = ValArg.getValueType();
693
694 if (ValVT == MVT::v1i1)
695 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
696 DAG.getIntPtrConstant(0, DL));
697
698 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
699 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
700 // Two stage lowering might be required
701 // bitcast: v8i1 -> i8 / v16i1 -> i16
702 // anyextend: i8 -> i32 / i16 -> i32
703 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
704 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
705 if (ValLoc == MVT::i32)
706 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
707 return ValToCopy;
708 }
709
710 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
711 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
712 // One stage lowering is required
713 // bitcast: v32i1 -> i32 / v64i1 -> i64
714 return DAG.getBitcast(ValLoc, ValArg);
715 }
716
717 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
718}
719
720/// Breaks v64i1 value into two registers and adds the new node to the DAG
722 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
723 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
724 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
725 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
726 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
727 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
728 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
729 "The value should reside in two registers");
730
731 // Before splitting the value we cast it to i64
732 Arg = DAG.getBitcast(MVT::i64, Arg);
733
734 // Splitting the value into two i32 types
735 SDValue Lo, Hi;
736 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
737
738 // Attach the two i32 types into corresponding registers
739 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
740 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
741}
742
744X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
745 bool isVarArg,
747 const SmallVectorImpl<SDValue> &OutVals,
748 const SDLoc &dl, SelectionDAG &DAG) const {
749 MachineFunction &MF = DAG.getMachineFunction();
750 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
751
752 // In some cases we need to disable registers from the default CSR list.
753 // For example, when they are used as return registers (preserve_* and X86's
754 // regcall) or for argument passing (X86's regcall).
755 bool ShouldDisableCalleeSavedRegister =
756 shouldDisableRetRegFromCSR(CallConv) ||
757 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
758
759 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
760 report_fatal_error("X86 interrupts may not return any value");
761
763 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
764 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
765
767 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
768 ++I, ++OutsIndex) {
769 CCValAssign &VA = RVLocs[I];
770 assert(VA.isRegLoc() && "Can only return in registers!");
771
772 // Add the register to the CalleeSaveDisableRegs list.
773 if (ShouldDisableCalleeSavedRegister)
775
776 SDValue ValToCopy = OutVals[OutsIndex];
777 EVT ValVT = ValToCopy.getValueType();
778
779 // Promote values to the appropriate types.
780 if (VA.getLocInfo() == CCValAssign::SExt)
781 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
782 else if (VA.getLocInfo() == CCValAssign::ZExt)
783 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
784 else if (VA.getLocInfo() == CCValAssign::AExt) {
785 if (ValVT.isVectorOf(MVT::i1))
786 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
787 else
788 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
789 }
790 else if (VA.getLocInfo() == CCValAssign::BCvt)
791 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
792
794 "Unexpected FP-extend for return value.");
795
796 // Report an error if we have attempted to return a value via an XMM
797 // register and SSE was disabled.
798 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
799 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
800 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
801 } else if (!Subtarget.hasSSE2() &&
802 X86::FR64XRegClass.contains(VA.getLocReg()) &&
803 ValVT == MVT::f64) {
804 // When returning a double via an XMM register, report an error if SSE2 is
805 // not enabled.
806 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
807 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
808 }
809
810 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
811 // the RET instruction and handled by the FP Stackifier.
812 if (VA.getLocReg() == X86::FP0 ||
813 VA.getLocReg() == X86::FP1) {
814 // If this is a copy from an xmm register to ST(0), use an FPExtend to
815 // change the value to the FP stack register class.
817 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
818 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
819 // Don't emit a copytoreg.
820 continue;
821 }
822
823 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
824 // which is returned in RAX / RDX.
825 if (Subtarget.is64Bit()) {
826 if (ValVT == MVT::x86mmx) {
827 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
828 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
829 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
830 ValToCopy);
831 // If we don't have SSE2 available, convert to v4f32 so the generated
832 // register is legal.
833 if (!Subtarget.hasSSE2())
834 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
835 }
836 }
837 }
838
839 if (VA.needsCustom()) {
840 assert(VA.getValVT() == MVT::v64i1 &&
841 "Currently the only custom case is when we split v64i1 to 2 regs");
842
843 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
844 Subtarget);
845
846 // Add the second register to the CalleeSaveDisableRegs list.
847 if (ShouldDisableCalleeSavedRegister)
848 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
849 } else {
850 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
851 }
852 }
853
854 SDValue Glue;
856 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
857 // Operand #1 = Bytes To Pop
858 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
859 MVT::i32));
860
861 // Copy the result values into the output registers.
862 for (auto &RetVal : RetVals) {
863 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
864 RetOps.push_back(RetVal.second);
865 continue; // Don't emit a copytoreg.
866 }
867
868 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
869 Glue = Chain.getValue(1);
870 RetOps.push_back(
871 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
872 }
873
874 // Swift calling convention does not require we copy the sret argument
875 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
876
877 // All x86 ABIs require that for returning structs by value we copy
878 // the sret argument into %rax/%eax (depending on ABI) for the return.
879 // We saved the argument into a virtual register in the entry block,
880 // so now we copy the value out and into %rax/%eax.
881 //
882 // Checking Function.hasStructRetAttr() here is insufficient because the IR
883 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
884 // false, then an sret argument may be implicitly inserted in the SelDAG. In
885 // either case FuncInfo->setSRetReturnReg() will have been called.
886 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
887 // When we have both sret and another return value, we should use the
888 // original Chain stored in RetOps[0], instead of the current Chain updated
889 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
890
891 // For the case of sret and another return value, we have
892 // Chain_0 at the function entry
893 // Chain_1 = getCopyToReg(Chain_0) in the above loop
894 // If we use Chain_1 in getCopyFromReg, we will have
895 // Val = getCopyFromReg(Chain_1)
896 // Chain_2 = getCopyToReg(Chain_1, Val) from below
897
898 // getCopyToReg(Chain_0) will be glued together with
899 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
900 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
901 // Data dependency from Unit B to Unit A due to usage of Val in
902 // getCopyToReg(Chain_1, Val)
903 // Chain dependency from Unit A to Unit B
904
905 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
906 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
908
909 Register RetValReg
910 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
911 X86::RAX : X86::EAX;
912 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
913 Glue = Chain.getValue(1);
914
915 // RAX/EAX now acts like a return value.
916 RetOps.push_back(
917 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
918
919 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
920 // this however for preserve_most/preserve_all to minimize the number of
921 // callee-saved registers for these CCs.
922 if (ShouldDisableCalleeSavedRegister &&
923 CallConv != CallingConv::PreserveAll &&
924 CallConv != CallingConv::PreserveMost)
926 }
927
928 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
929 const MCPhysReg *I =
930 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
931 if (I) {
932 for (; *I; ++I) {
933 if (X86::GR64RegClass.contains(*I))
934 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
935 else
936 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
937 }
938 }
939
940 RetOps[0] = Chain; // Update chain.
941
942 // Add the glue if we have it.
943 if (Glue.getNode())
944 RetOps.push_back(Glue);
945
946 unsigned RetOpcode = X86ISD::RET_GLUE;
947 if (CallConv == CallingConv::X86_INTR)
948 RetOpcode = X86ISD::IRET;
949 return DAG.getNode(RetOpcode, dl, MVT::Other, RetOps);
950}
951
952bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
953 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
954 return false;
955
956 SDValue TCChain = Chain;
957 SDNode *Copy = *N->user_begin();
958 if (Copy->getOpcode() == ISD::CopyToReg) {
959 // If the copy has a glue operand, we conservatively assume it isn't safe to
960 // perform a tail call.
961 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
962 return false;
963 TCChain = Copy->getOperand(0);
964 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
965 return false;
966
967 bool HasRet = false;
968 for (const SDNode *U : Copy->users()) {
969 if (U->getOpcode() != X86ISD::RET_GLUE)
970 return false;
971 // If we are returning more than one value, we can definitely
972 // not make a tail call see PR19530
973 if (U->getNumOperands() > 4)
974 return false;
975 if (U->getNumOperands() == 4 &&
976 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
977 return false;
978 HasRet = true;
979 }
980
981 if (!HasRet)
982 return false;
983
984 Chain = TCChain;
985 return true;
986}
987
988EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
989 ISD::NodeType ExtendKind) const {
990 MVT ReturnMVT = MVT::i32;
991
992 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
993 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
994 // The ABI does not require i1, i8 or i16 to be extended.
995 //
996 // On Darwin, there is code in the wild relying on Clang's old behaviour of
997 // always extending i8/i16 return values, so keep doing that for now.
998 // (PR26665).
999 ReturnMVT = MVT::i8;
1000 }
1001
1002 EVT MinVT = getRegisterType(Context, ReturnMVT);
1003 return VT.bitsLT(MinVT) ? MinVT : VT;
1004}
1005
1006/// Reads two 32 bit registers and creates a 64 bit mask value.
1007/// \param VA The current 32 bit value that need to be assigned.
1008/// \param NextVA The next 32 bit value that need to be assigned.
1009/// \param Root The parent DAG node.
1010/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1011/// glue purposes. In the case the DAG is already using
1012/// physical register instead of virtual, we should glue
1013/// our new SDValue to InGlue SDvalue.
1014/// \return a new SDvalue of size 64bit.
1016 SDValue &Root, SelectionDAG &DAG,
1017 const SDLoc &DL, const X86Subtarget &Subtarget,
1018 SDValue *InGlue = nullptr) {
1019 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1020 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1021 assert(VA.getValVT() == MVT::v64i1 &&
1022 "Expecting first location of 64 bit width type");
1023 assert(NextVA.getValVT() == VA.getValVT() &&
1024 "The locations should have the same type");
1025 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1026 "The values should reside in two registers");
1027
1028 SDValue Lo, Hi;
1029 SDValue ArgValueLo, ArgValueHi;
1030
1032 const TargetRegisterClass *RC = &X86::GR32RegClass;
1033
1034 // Read a 32 bit value from the registers.
1035 if (nullptr == InGlue) {
1036 // When no physical register is present,
1037 // create an intermediate virtual register.
1038 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1039 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1040 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1041 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1042 } else {
1043 // When a physical register is available read the value from it and glue
1044 // the reads together.
1045 ArgValueLo =
1046 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1047 *InGlue = ArgValueLo.getValue(2);
1048 ArgValueHi =
1049 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1050 *InGlue = ArgValueHi.getValue(2);
1051 }
1052
1053 // Convert the i32 type into v32i1 type.
1054 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1055
1056 // Convert the i32 type into v32i1 type.
1057 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1058
1059 // Concatenate the two values together.
1060 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1061}
1062
1063/// The function will lower a register of various sizes (8/16/32/64)
1064/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1065/// \returns a DAG node contains the operand after lowering to mask type.
1066static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1067 const EVT &ValLoc, const SDLoc &DL,
1068 SelectionDAG &DAG) {
1069 SDValue ValReturned = ValArg;
1070
1071 if (ValVT == MVT::v1i1)
1072 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1073
1074 if (ValVT == MVT::v64i1) {
1075 // In 32 bit machine, this case is handled by getv64i1Argument
1076 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1077 // In 64 bit machine, There is no need to truncate the value only bitcast
1078 } else {
1079 MVT MaskLenVT;
1080 switch (ValVT.getSimpleVT().SimpleTy) {
1081 case MVT::v8i1:
1082 MaskLenVT = MVT::i8;
1083 break;
1084 case MVT::v16i1:
1085 MaskLenVT = MVT::i16;
1086 break;
1087 case MVT::v32i1:
1088 MaskLenVT = MVT::i32;
1089 break;
1090 default:
1091 llvm_unreachable("Expecting a vector of i1 types");
1092 }
1093
1094 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1095 }
1096 return DAG.getBitcast(ValVT, ValReturned);
1097}
1098
1100 const SDLoc &dl, Register Reg, EVT VT,
1101 SDValue Glue) {
1102 SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue);
1103 SDValue Ops[] = {Chain, DAG.getRegister(Reg, VT), Glue};
1104 return DAG.getNode(X86ISD::POP_FROM_X87_REG, dl, VTs,
1105 ArrayRef(Ops, Glue.getNode() ? 3 : 2));
1106}
1107
1108/// Lower the result values of a call into the
1109/// appropriate copies out of appropriate physical registers.
1110///
1111SDValue X86TargetLowering::LowerCallResult(
1112 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1113 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1115 uint32_t *RegMask) const {
1116
1117 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1118 // Assign locations to each value returned by this call.
1120 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1121 *DAG.getContext());
1122 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1123
1124 // Copy all of the result registers out of their specified physreg.
1125 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1126 ++I, ++InsIndex) {
1127 CCValAssign &VA = RVLocs[I];
1128 EVT CopyVT = VA.getLocVT();
1129
1130 // In some calling conventions we need to remove the used registers
1131 // from the register mask.
1132 if (RegMask) {
1133 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1134 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1135 }
1136
1137 // Report an error if there was an attempt to return FP values via XMM
1138 // registers.
1139 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1140 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1141 if (VA.getLocReg() == X86::XMM1)
1142 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1143 else
1144 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1145 } else if (!Subtarget.hasSSE2() &&
1146 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1147 CopyVT == MVT::f64) {
1148 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1149 if (VA.getLocReg() == X86::XMM1)
1150 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1151 else
1152 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1153 }
1154
1155 // If we prefer to use the value in xmm registers, copy it out as f80 and
1156 // use a truncate to move it from fp stack reg to xmm reg.
1157 bool RoundAfterCopy = false;
1158 bool X87Result = VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1;
1159 if (X87Result && isScalarFPTypeInSSEReg(VA.getValVT())) {
1160 if (!Subtarget.hasX87())
1161 report_fatal_error("X87 register return with X87 disabled");
1162 CopyVT = MVT::f80;
1163 RoundAfterCopy = (CopyVT != VA.getLocVT());
1164 }
1165
1166 SDValue Val;
1167 if (VA.needsCustom()) {
1168 assert(VA.getValVT() == MVT::v64i1 &&
1169 "Currently the only custom case is when we split v64i1 to 2 regs");
1170 Val =
1171 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1172 } else {
1173 Chain =
1174 X87Result
1175 ? getPopFromX87Reg(DAG, Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1176 .getValue(1)
1177 : DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1178 .getValue(1);
1179 Val = Chain.getValue(0);
1180 InGlue = Chain.getValue(2);
1181 }
1182
1183 if (RoundAfterCopy)
1184 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1185 // This truncation won't change the value.
1186 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1187
1188 if (VA.isExtInLoc()) {
1189 if (VA.getValVT().isVector() &&
1190 VA.getValVT().getScalarType() == MVT::i1 &&
1191 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1192 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1193 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1194 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1195 } else
1196 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1197 }
1198
1199 if (VA.getLocInfo() == CCValAssign::BCvt)
1200 Val = DAG.getBitcast(VA.getValVT(), Val);
1201
1202 InVals.push_back(Val);
1203 }
1204
1205 return Chain;
1206}
1207
1208/// Determines whether Args, either a set of outgoing arguments to a call, or a
1209/// set of incoming args of a call, contains an sret pointer that the callee
1210/// pops. This happens on most x86-32, System V platforms, unless register
1211/// parameters are in use (-mregparm=1+, regcallcc, etc).
1212template <typename T>
1213static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1214 const SmallVectorImpl<CCValAssign> &ArgLocs,
1215 const X86Subtarget &Subtarget) {
1216 // Not C++20 (yet), so no concepts available.
1217 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1218 std::is_same_v<T, ISD::InputArg>,
1219 "requires ISD::OutputArg or ISD::InputArg");
1220
1221 // Popping the sret pointer only happens on x86-32 System V ABI platforms
1222 // (Linux, Cygwin, BSDs, Mac, etc). That excludes Windows-minus-Cygwin and
1223 // MCU.
1224 const Triple &TT = Subtarget.getTargetTriple();
1225 if (!TT.isX86_32() || TT.isOSMSVCRT() || TT.isOSIAMCU())
1226 return false;
1227
1228 // Check if the first argument is marked sret and if it is passed in memory.
1229 bool IsSRetInMem = false;
1230 if (!Args.empty())
1231 IsSRetInMem = Args.front().Flags.isSRet() && ArgLocs.front().isMemLoc();
1232 return IsSRetInMem;
1233}
1234
1235/// Make a copy of an aggregate at address specified by "Src" to address
1236/// "Dst" with size and alignment information specified by the specific
1237/// parameter attribute. The copy will be passed as a byval function parameter.
1239 SDValue Chain, ISD::ArgFlagsTy Flags,
1240 SelectionDAG &DAG, const SDLoc &dl) {
1241 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1242
1243 return DAG.getMemcpy(
1244 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1245 /*isVolatile*/ false, /*AlwaysInline=*/true,
1246 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1247}
1248
1249/// Return true if the calling convention is one that we can guarantee TCO for.
1251 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1254}
1255
1256/// Return true if we might ever do TCO for calls with this calling convention.
1258 switch (CC) {
1259 // C calling conventions:
1260 case CallingConv::C:
1261 case CallingConv::Win64:
1264 // Callee pop conventions:
1269 // Swift:
1270 case CallingConv::Swift:
1271 return true;
1272 default:
1273 return canGuaranteeTCO(CC);
1274 }
1275}
1276
1277/// Return true if the function is being made into a tailcall target by
1278/// changing its ABI.
1279static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1280 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1282}
1283
1284bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1285 if (!CI->isTailCall())
1286 return false;
1287
1288 CallingConv::ID CalleeCC = CI->getCallingConv();
1289 if (!mayTailCallThisCC(CalleeCC))
1290 return false;
1291
1292 return true;
1293}
1294
1295SDValue
1296X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1298 const SDLoc &dl, SelectionDAG &DAG,
1299 const CCValAssign &VA,
1300 MachineFrameInfo &MFI, unsigned i) const {
1301 // Create the nodes corresponding to a load from this parameter slot.
1302 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1303 bool AlwaysUseMutable = shouldGuaranteeTCO(
1304 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1305 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1306 EVT ValVT;
1307 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1308
1309 // If value is passed by pointer we have address passed instead of the value
1310 // itself. No need to extend if the mask value and location share the same
1311 // absolute size.
1312 bool ExtendedInMem =
1313 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1315
1316 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1317 ValVT = VA.getLocVT();
1318 else
1319 ValVT = VA.getValVT();
1320
1321 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1322 // changed with more analysis.
1323 // In case of tail call optimization mark all arguments mutable. Since they
1324 // could be overwritten by lowering of arguments in case of a tail call.
1325 if (Flags.isByVal()) {
1326 unsigned Bytes = Flags.getByValSize();
1327 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1328
1329 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1330 // can be improved with deeper analysis.
1331 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1332 /*isAliased=*/true);
1333 return DAG.getFrameIndex(FI, PtrVT);
1334 }
1335
1336 EVT ArgVT = Ins[i].ArgVT;
1337
1338 // If this is a vector that has been split into multiple parts, don't elide
1339 // the copy. The layout on the stack may not match the packed in-memory
1340 // layout.
1341 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1342
1343 // This is an argument in memory. We might be able to perform copy elision.
1344 // If the argument is passed directly in memory without any extension, then we
1345 // can perform copy elision. Large vector types, for example, may be passed
1346 // indirectly by pointer.
1347 if (Flags.isCopyElisionCandidate() &&
1348 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1349 !ScalarizedVector) {
1350 SDValue PartAddr;
1351 if (Ins[i].PartOffset == 0) {
1352 // If this is a one-part value or the first part of a multi-part value,
1353 // create a stack object for the entire argument value type and return a
1354 // load from our portion of it. This assumes that if the first part of an
1355 // argument is in memory, the rest will also be in memory.
1356 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1357 /*IsImmutable=*/false);
1358 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1359 return DAG.getLoad(
1360 ValVT, dl, Chain, PartAddr,
1362 }
1363
1364 // This is not the first piece of an argument in memory. See if there is
1365 // already a fixed stack object including this offset. If so, assume it
1366 // was created by the PartOffset == 0 branch above and create a load from
1367 // the appropriate offset into it.
1368 int64_t PartBegin = VA.getLocMemOffset();
1369 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1370 int FI = MFI.getObjectIndexBegin();
1371 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1372 int64_t ObjBegin = MFI.getObjectOffset(FI);
1373 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1374 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1375 break;
1376 }
1377 if (MFI.isFixedObjectIndex(FI)) {
1378 SDValue Addr =
1379 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1380 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1381 return DAG.getLoad(ValVT, dl, Chain, Addr,
1383 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1384 }
1385 }
1386
1387 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1388 VA.getLocMemOffset(), isImmutable);
1389
1390 // Set SExt or ZExt flag.
1391 if (VA.getLocInfo() == CCValAssign::ZExt) {
1392 MFI.setObjectZExt(FI, true);
1393 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1394 MFI.setObjectSExt(FI, true);
1395 }
1396
1397 MaybeAlign Alignment;
1398 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1399 ValVT != MVT::f80)
1400 Alignment = MaybeAlign(4);
1401 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1402 SDValue Val = DAG.getLoad(
1403 ValVT, dl, Chain, FIN,
1405 Alignment);
1406 return ExtendedInMem
1407 ? (VA.getValVT().isVector()
1408 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1409 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1410 : Val;
1411}
1412
1413// FIXME: Get this from tablegen.
1415 const X86Subtarget &Subtarget) {
1416 assert(Subtarget.is64Bit());
1417
1418 if (Subtarget.isCallingConvWin64(CallConv)) {
1419 static const MCPhysReg GPR64ArgRegsWin64[] = {
1420 X86::RCX, X86::RDX, X86::R8, X86::R9
1421 };
1422 return GPR64ArgRegsWin64;
1423 }
1424
1425 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1426 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1427 };
1428 return GPR64ArgRegs64Bit;
1429}
1430
1431// FIXME: Get this from tablegen.
1433 CallingConv::ID CallConv,
1434 const X86Subtarget &Subtarget) {
1435 assert(Subtarget.is64Bit());
1436 if (Subtarget.isCallingConvWin64(CallConv)) {
1437 // The XMM registers which might contain var arg parameters are shadowed
1438 // in their paired GPR. So we only need to save the GPR to their home
1439 // slots.
1440 // TODO: __vectorcall will change this.
1441 return {};
1442 }
1443
1444 bool isSoftFloat = Subtarget.useSoftFloat();
1445 if (isSoftFloat || !Subtarget.hasSSE1())
1446 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1447 // registers.
1448 return {};
1449
1450 static const MCPhysReg XMMArgRegs64Bit[] = {
1451 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1452 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1453 };
1454 return XMMArgRegs64Bit;
1455}
1456
1457#ifndef NDEBUG
1459 return llvm::is_sorted(
1460 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1461 return A.getValNo() < B.getValNo();
1462 });
1463}
1464#endif
1465
1466namespace {
1467/// This is a helper class for lowering variable arguments parameters.
1468class VarArgsLoweringHelper {
1469public:
1470 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1471 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1472 CallingConv::ID CallConv, CCState &CCInfo)
1473 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1474 TheMachineFunction(DAG.getMachineFunction()),
1475 TheFunction(TheMachineFunction.getFunction()),
1476 FrameInfo(TheMachineFunction.getFrameInfo()),
1477 FrameLowering(*Subtarget.getFrameLowering()),
1478 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1479 CCInfo(CCInfo) {}
1480
1481 // Lower variable arguments parameters.
1482 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1483
1484private:
1485 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1486
1487 void forwardMustTailParameters(SDValue &Chain);
1488
1489 bool is64Bit() const { return Subtarget.is64Bit(); }
1490 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1491
1492 X86MachineFunctionInfo *FuncInfo;
1493 const SDLoc &DL;
1494 SelectionDAG &DAG;
1495 const X86Subtarget &Subtarget;
1496 MachineFunction &TheMachineFunction;
1497 const Function &TheFunction;
1498 MachineFrameInfo &FrameInfo;
1499 const TargetFrameLowering &FrameLowering;
1500 const TargetLowering &TargLowering;
1501 CallingConv::ID CallConv;
1502 CCState &CCInfo;
1503};
1504} // namespace
1505
1506void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1507 SDValue &Chain, unsigned StackSize) {
1508 // If the function takes variable number of arguments, make a frame index for
1509 // the start of the first vararg value... for expansion of llvm.va_start. We
1510 // can skip this if there are no va_start calls.
1511 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1512 CallConv != CallingConv::X86_ThisCall)) {
1513 FuncInfo->setVarArgsFrameIndex(
1514 FrameInfo.CreateFixedObject(1, StackSize, true));
1515 }
1516
1517 // 64-bit calling conventions support varargs and register parameters, so we
1518 // have to do extra work to spill them in the prologue.
1519 if (is64Bit()) {
1520 // Find the first unallocated argument registers.
1521 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1522 ArrayRef<MCPhysReg> ArgXMMs =
1523 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1524 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1525 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1526
1527 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1528 "SSE register cannot be used when SSE is disabled!");
1529
1530 if (isWin64()) {
1531 // Get to the caller-allocated home save location. Add 8 to account
1532 // for the return address.
1533 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1534 FuncInfo->setRegSaveFrameIndex(
1535 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1536 // Fixup to set vararg frame on shadow area (4 x i64).
1537 if (NumIntRegs < 4)
1538 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1539 } else {
1540 // For X86-64, if there are vararg parameters that are passed via
1541 // registers, then we must store them to their spots on the stack so
1542 // they may be loaded by dereferencing the result of va_next.
1543 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1544 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1545 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1546 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1547 }
1548
1550 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1551 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1552 // keeping live input value
1553 SDValue ALVal; // if applicable keeps SDValue for %al register
1554
1555 // Gather all the live in physical registers.
1556 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1557 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1558 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1559 }
1560 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1561 if (!AvailableXmms.empty()) {
1562 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1563 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1564 for (MCPhysReg Reg : AvailableXmms) {
1565 // FastRegisterAllocator spills virtual registers at basic
1566 // block boundary. That leads to usages of xmm registers
1567 // outside of check for %al. Pass physical registers to
1568 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1569 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1570 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1571 }
1572 }
1573
1574 // Store the integer parameter registers.
1576 SDValue RSFIN =
1577 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1578 TargLowering.getPointerTy(DAG.getDataLayout()));
1579 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1580 for (SDValue Val : LiveGPRs) {
1581 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1582 TargLowering.getPointerTy(DAG.getDataLayout()),
1583 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1584 SDValue Store =
1585 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1587 DAG.getMachineFunction(),
1588 FuncInfo->getRegSaveFrameIndex(), Offset));
1589 MemOps.push_back(Store);
1590 Offset += 8;
1591 }
1592
1593 // Now store the XMM (fp + vector) parameter registers.
1594 if (!LiveXMMRegs.empty()) {
1595 SmallVector<SDValue, 12> SaveXMMOps;
1596 SaveXMMOps.push_back(Chain);
1597 SaveXMMOps.push_back(ALVal);
1598 SaveXMMOps.push_back(RSFIN);
1599 SaveXMMOps.push_back(
1600 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1601 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1602 MachineMemOperand *StoreMMO =
1605 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1606 Offset),
1608 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1609 DL, DAG.getVTList(MVT::Other),
1610 SaveXMMOps, MVT::i8, StoreMMO));
1611 }
1612
1613 if (!MemOps.empty())
1614 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1615 }
1616}
1617
1618void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1619 // Find the largest legal vector type.
1620 MVT VecVT = MVT::Other;
1621 // FIXME: Only some x86_32 calling conventions support AVX512.
1622 if (Subtarget.useAVX512Regs() &&
1623 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1624 CallConv == CallingConv::Intel_OCL_BI)))
1625 VecVT = MVT::v16f32;
1626 else if (Subtarget.hasAVX())
1627 VecVT = MVT::v8f32;
1628 else if (Subtarget.hasSSE2())
1629 VecVT = MVT::v4f32;
1630
1631 // We forward some GPRs and some vector types.
1632 SmallVector<MVT, 2> RegParmTypes;
1633 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1634 RegParmTypes.push_back(IntVT);
1635 if (VecVT != MVT::Other)
1636 RegParmTypes.push_back(VecVT);
1637
1638 // Compute the set of forwarded registers. The rest are scratch.
1640 FuncInfo->getForwardedMustTailRegParms();
1641 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1642
1643 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1644 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1645 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1646 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1647 }
1648
1649 // Copy all forwards from physical to virtual registers.
1650 for (ForwardedRegister &FR : Forwards) {
1651 // FIXME: Can we use a less constrained schedule?
1652 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1653 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1654 TargLowering.getRegClassFor(FR.VT));
1655 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1656 }
1657}
1658
1659void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1660 unsigned StackSize) {
1661 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1662 // If necessary, it would be set into the correct value later.
1663 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1664 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1665
1666 if (FrameInfo.hasVAStart())
1667 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1668
1669 if (FrameInfo.hasMustTailInVarArgFunc())
1670 forwardMustTailParameters(Chain);
1671}
1672
1673SDValue X86TargetLowering::LowerFormalArguments(
1674 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1675 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1676 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1677 MachineFunction &MF = DAG.getMachineFunction();
1678 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1679
1680 const Function &F = MF.getFunction();
1681 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1682 F.getName() == "main")
1683 FuncInfo->setForceFramePointer(true);
1684
1685 MachineFrameInfo &MFI = MF.getFrameInfo();
1686 bool Is64Bit = Subtarget.is64Bit();
1687 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1688
1689 // On x86_64 with x87 disabled, x86_fp80 cannot be handled: the type would
1690 // need to be returned/passed in x87 registers (FP0/FP1) which are
1691 // unavailable. Emit a clear diagnostic instead of crashing later with
1692 // "Cannot select: build_pair".
1693 if (Is64Bit && !Subtarget.hasX87()) {
1694 if (F.getReturnType()->isX86_FP80Ty() ||
1695 any_of(F.args(), [](const Argument &Arg) {
1696 return Arg.getType()->isX86_FP80Ty();
1697 }))
1699 "cannot use x86_fp80 type with x87 disabled on x86_64 target");
1700 }
1701
1702 assert(
1703 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1704 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1705
1706 // Assign locations to all of the incoming arguments.
1708 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1709
1710 // Allocate shadow area for Win64.
1711 if (IsWin64)
1712 CCInfo.AllocateStack(32, Align(8));
1713
1714 CCInfo.AnalyzeArguments(Ins, CC_X86);
1715
1716 // In vectorcall calling convention a second pass is required for the HVA
1717 // types.
1718 if (CallingConv::X86_VectorCall == CallConv) {
1719 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1720 }
1721
1722 // The next loop assumes that the locations are in the same order of the
1723 // input arguments.
1724 assert(isSortedByValueNo(ArgLocs) &&
1725 "Argument Location list must be sorted before lowering");
1726
1727 SDValue ArgValue;
1728 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1729 ++I, ++InsIndex) {
1730 assert(InsIndex < Ins.size() && "Invalid Ins index");
1731 CCValAssign &VA = ArgLocs[I];
1732
1733 if (VA.isRegLoc()) {
1734 EVT RegVT = VA.getLocVT();
1735 if (VA.needsCustom()) {
1736 assert(
1737 VA.getValVT() == MVT::v64i1 &&
1738 "Currently the only custom case is when we split v64i1 to 2 regs");
1739
1740 // v64i1 values, in regcall calling convention, that are
1741 // compiled to 32 bit arch, are split up into two registers.
1742 ArgValue =
1743 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1744 } else {
1745 const TargetRegisterClass *RC;
1746 if (RegVT == MVT::i8)
1747 RC = &X86::GR8RegClass;
1748 else if (RegVT == MVT::i16)
1749 RC = &X86::GR16RegClass;
1750 else if (RegVT == MVT::i32)
1751 RC = &X86::GR32RegClass;
1752 else if (Is64Bit && RegVT == MVT::i64)
1753 RC = &X86::GR64RegClass;
1754 else if (RegVT == MVT::f16)
1755 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1756 else if (RegVT == MVT::f32)
1757 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1758 else if (RegVT == MVT::f64)
1759 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1760 else if (RegVT == MVT::f80)
1761 RC = &X86::RFP80RegClass;
1762 else if (RegVT == MVT::f128)
1763 RC = &X86::VR128RegClass;
1764 else if (RegVT.is512BitVector())
1765 RC = &X86::VR512RegClass;
1766 else if (RegVT.is256BitVector())
1767 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1768 else if (RegVT.is128BitVector())
1769 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1770 else if (RegVT == MVT::x86mmx)
1771 RC = &X86::VR64RegClass;
1772 else if (RegVT == MVT::v1i1)
1773 RC = &X86::VK1RegClass;
1774 else if (RegVT == MVT::v8i1)
1775 RC = &X86::VK8RegClass;
1776 else if (RegVT == MVT::v16i1)
1777 RC = &X86::VK16RegClass;
1778 else if (RegVT == MVT::v32i1)
1779 RC = &X86::VK32RegClass;
1780 else if (RegVT == MVT::v64i1)
1781 RC = &X86::VK64RegClass;
1782 else
1783 llvm_unreachable("Unknown argument type!");
1784
1785 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1786 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1787 }
1788
1789 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1790 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1791 // right size.
1792 if (VA.getLocInfo() == CCValAssign::SExt)
1793 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1794 DAG.getValueType(VA.getValVT()));
1795 else if (VA.getLocInfo() == CCValAssign::ZExt)
1796 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1797 DAG.getValueType(VA.getValVT()));
1798 else if (VA.getLocInfo() == CCValAssign::BCvt)
1799 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1800
1801 if (VA.isExtInLoc()) {
1802 // Handle MMX values passed in XMM regs.
1803 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1804 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1805 else if (VA.getValVT().isVector() &&
1806 VA.getValVT().getScalarType() == MVT::i1 &&
1807 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1808 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1809 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1810 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1811 } else
1812 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1813 }
1814 } else {
1815 assert(VA.isMemLoc());
1816 ArgValue =
1817 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1818 }
1819
1820 // If value is passed via pointer - do a load.
1821 if (VA.getLocInfo() == CCValAssign::Indirect &&
1822 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1823 ArgValue =
1824 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1825 }
1826
1827 InVals.push_back(ArgValue);
1828 }
1829
1830 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1831 if (Ins[I].Flags.isSwiftAsync()) {
1832 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1833 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1834 X86FI->setHasSwiftAsyncContext(true);
1835 else {
1836 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1837 int FI =
1838 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1839 X86FI->setSwiftAsyncContextFrameIdx(FI);
1840 SDValue St = DAG.getStore(
1841 DAG.getEntryNode(), dl, InVals[I],
1842 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1844 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1845 }
1846 }
1847
1848 // Swift calling convention does not require we copy the sret argument
1849 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1850 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1851 continue;
1852
1853 // All x86 ABIs require that for returning structs by value we copy the
1854 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1855 // the argument into a virtual register so that we can access it from the
1856 // return points.
1857 if (Ins[I].Flags.isSRet()) {
1858 assert(!FuncInfo->getSRetReturnReg() &&
1859 "SRet return has already been set");
1860 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1861 Register Reg =
1863 FuncInfo->setSRetReturnReg(Reg);
1864 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1865 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1866 break;
1867 }
1868 }
1869
1870 unsigned StackSize = CCInfo.getStackSize();
1871 // Align stack specially for tail calls.
1872 if (shouldGuaranteeTCO(CallConv,
1874 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1875
1876 if (IsVarArg)
1877 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1878 .lowerVarArgsParameters(Chain, StackSize);
1879
1880 // Some CCs need callee pop.
1881 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1883 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1884 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1885 // X86 interrupts must pop the error code (and the alignment padding) if
1886 // present.
1887 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1888 } else {
1889 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1890 // If this is an sret function, the return should pop the hidden pointer.
1891 if (hasCalleePopSRet(Ins, ArgLocs, Subtarget))
1892 FuncInfo->setBytesToPopOnReturn(4);
1893 }
1894
1895 if (!Is64Bit) {
1896 // RegSaveFrameIndex is X86-64 only.
1897 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1898 }
1899
1900 FuncInfo->setArgumentStackSize(StackSize);
1901
1902 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1903 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1904 if (Personality == EHPersonality::CoreCLR) {
1905 assert(Is64Bit);
1906 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1907 // that we'd prefer this slot be allocated towards the bottom of the frame
1908 // (i.e. near the stack pointer after allocating the frame). Every
1909 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1910 // offset from the bottom of this and each funclet's frame must be the
1911 // same, so the size of funclets' (mostly empty) frames is dictated by
1912 // how far this slot is from the bottom (since they allocate just enough
1913 // space to accommodate holding this slot at the correct offset).
1914 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1915 EHInfo->PSPSymFrameIdx = PSPSymFI;
1916 }
1917 }
1918
1919 if (shouldDisableArgRegFromCSR(CallConv) ||
1920 F.hasFnAttribute("no_caller_saved_registers")) {
1921 MachineRegisterInfo &MRI = MF.getRegInfo();
1922 for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1923 MRI.disableCalleeSavedRegister(Pair.first);
1924 }
1925
1926 if (CallingConv::PreserveNone == CallConv)
1927 for (const ISD::InputArg &In : Ins) {
1928 if (In.Flags.isSwiftSelf() || In.Flags.isSwiftAsync() ||
1929 In.Flags.isSwiftError()) {
1930 errorUnsupported(DAG, dl,
1931 "Swift attributes can't be used with preserve_none");
1932 break;
1933 }
1934 }
1935
1936 return Chain;
1937}
1938
1939SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1940 SDValue Arg, const SDLoc &dl,
1941 SelectionDAG &DAG,
1942 const CCValAssign &VA,
1943 ISD::ArgFlagsTy Flags,
1944 bool isByVal) const {
1945 unsigned LocMemOffset = VA.getLocMemOffset();
1946 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1947 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1948 StackPtr, PtrOff);
1949 if (isByVal)
1950 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1951
1952 MaybeAlign Alignment;
1953 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1954 Arg.getSimpleValueType() != MVT::f80)
1955 Alignment = MaybeAlign(4);
1956 return DAG.getStore(
1957 Chain, dl, Arg, PtrOff,
1959 Alignment);
1960}
1961
1962/// Emit a load of return address if tail call
1963/// optimization is performed and it is required.
1964SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1965 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1966 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1967 // Adjust the Return address stack slot.
1968 EVT VT = getPointerTy(DAG.getDataLayout());
1969 OutRetAddr = getReturnAddressFrameIndex(DAG);
1970
1971 // Load the "old" Return address.
1972 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1973 return SDValue(OutRetAddr.getNode(), 1);
1974}
1975
1976/// Emit a store of the return address if tail call
1977/// optimization is performed and it is required (FPDiff!=0).
1979 SDValue Chain, SDValue RetAddrFrIdx,
1980 EVT PtrVT, unsigned SlotSize,
1981 int FPDiff, const SDLoc &dl) {
1982 // Store the return address to the appropriate stack slot.
1983 if (!FPDiff) return Chain;
1984 // Calculate the new stack slot for the return address.
1985 int NewReturnAddrFI =
1986 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1987 false);
1988 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1989 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1991 DAG.getMachineFunction(), NewReturnAddrFI));
1992 return Chain;
1993}
1994
1995/// Returns a vector_shuffle mask for an movs{s|d}, movd
1996/// operation of specified width.
1997SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1998 SDValue V1, SDValue V2) const {
1999 unsigned NumElems = VT.getVectorNumElements();
2000 SmallVector<int, 8> Mask;
2001 Mask.push_back(NumElems);
2002 for (unsigned i = 1; i != NumElems; ++i)
2003 Mask.push_back(i);
2004 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
2005}
2006
2007// Returns the type of copying which is required to set up a byval argument to
2008// a tail-called function. This isn't needed for non-tail calls, because they
2009// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2010// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2011// optimised to zero copies when forwarding an argument from the caller's
2012// caller (NoCopy).
2013X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
2014 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2015 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2016
2017 // Globals are always safe to copy from.
2019 return CopyOnce;
2020
2021 // Can only analyse frame index nodes, conservatively assume we need a
2022 // temporary.
2023 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2024 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2025 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2026 return CopyViaTemp;
2027
2028 int SrcFI = SrcFrameIdxNode->getIndex();
2029 int DstFI = DstFrameIdxNode->getIndex();
2030 assert(MFI.isFixedObjectIndex(DstFI) &&
2031 "byval passed in non-fixed stack slot");
2032
2033 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2034 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2035
2036 // If the source is in the local frame, then the copy to the argument
2037 // memory is always valid.
2038 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2039 if (!FixedSrc || (FixedSrc && SrcOffset < 0))
2040 return CopyOnce;
2041
2042 // If the value is already in the correct location, then no copying is
2043 // needed. If not, then we need to copy via a temporary.
2044 if (SrcOffset == DstOffset)
2045 return NoCopy;
2046 else
2047 return CopyViaTemp;
2048}
2049
2050SDValue
2051X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2052 SmallVectorImpl<SDValue> &InVals) const {
2053 SelectionDAG &DAG = CLI.DAG;
2054 SDLoc &dl = CLI.DL;
2055 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2056 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2057 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2058 SDValue Chain = CLI.Chain;
2059 SDValue Callee = CLI.Callee;
2060 CallingConv::ID CallConv = CLI.CallConv;
2061 bool &isTailCall = CLI.IsTailCall;
2062 bool isVarArg = CLI.IsVarArg;
2063 const auto *CB = CLI.CB;
2064
2065 MachineFunction &MF = DAG.getMachineFunction();
2066 bool Is64Bit = Subtarget.is64Bit();
2067 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2068 bool ShouldGuaranteeTCO = shouldGuaranteeTCO(
2069 CallConv, MF.getTarget().Options.GuaranteedTailCallOpt);
2070 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2071 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2072 CB->hasFnAttr("no_caller_saved_registers"));
2073 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2074 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2075 const Module *M = MF.getFunction().getParent();
2076
2077 // If the indirect call target has the nocf_check attribute, the call needs
2078 // the NOTRACK prefix. For simplicity just disable tail calls as there are
2079 // so many variants.
2080 // FIXME: This will cause backend errors if the user forces the issue.
2081 bool IsNoTrackIndirectCall = IsIndirectCall && CB->doesNoCfCheck() &&
2082 M->getModuleFlag("cf-protection-branch");
2083 if (IsNoTrackIndirectCall)
2084 isTailCall = false;
2085
2086 MachineFunction::CallSiteInfo CSInfo;
2087 if (CallConv == CallingConv::X86_INTR)
2088 report_fatal_error("X86 interrupts may not be called directly");
2089
2090 // Set type id for call site info.
2091 setTypeIdForCallsiteInfo(CB, MF, CSInfo);
2092
2093 if (IsIndirectCall && !IsWin64 &&
2094 M->getModuleFlag("import-call-optimization"))
2095 errorUnsupported(DAG, dl,
2096 "Indirect calls must have a normal calling convention if "
2097 "Import Call Optimization is enabled");
2098
2099 // Analyze operands of the call, assigning locations to each operand.
2101 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2102
2103 // Allocate shadow area for Win64.
2104 if (IsWin64)
2105 CCInfo.AllocateStack(32, Align(8));
2106
2107 CCInfo.AnalyzeArguments(Outs, CC_X86);
2108
2109 // In vectorcall calling convention a second pass is required for the HVA
2110 // types.
2111 if (CallingConv::X86_VectorCall == CallConv) {
2112 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2113 }
2114
2115 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2116 bool IsSibcall = false;
2117 if (isTailCall && ShouldGuaranteeTCO) {
2118 // If we need to guarantee TCO for a non-musttail call, we just need to make
2119 // sure the conventions match. If a tail call uses one of the supported TCO
2120 // conventions and the caller and callee match, we can tail call any
2121 // function prototype.
2122 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
2123 isTailCall = (CallConv == CallerCC);
2124 IsSibcall = IsMustTail;
2125 } else if (isTailCall) {
2126 // Check if this tail call is a "sibling" call, which is loosely defined to
2127 // be a tail call that doesn't require heroics like moving the return
2128 // address or swapping byval arguments. We treat some musttail calls as
2129 // sibling calls to avoid unnecessary argument copies.
2130 IsSibcall = isEligibleForSiblingCallOpt(CLI, CCInfo, ArgLocs);
2131 isTailCall = IsSibcall || IsMustTail;
2132 }
2133
2134 if (isTailCall)
2135 ++NumTailCalls;
2136
2137 if (IsMustTail && !isTailCall)
2138 report_fatal_error("failed to perform tail call elimination on a call "
2139 "site marked musttail");
2140
2141 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2142 "Var args not supported with calling convention fastcc, ghc or hipe");
2143
2144 // Get a count of how many bytes are to be pushed on the stack.
2145 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2146 if (IsSibcall)
2147 // This is a sibcall. The memory operands are available in caller's
2148 // own caller's stack.
2149 NumBytes = 0;
2150 else if (ShouldGuaranteeTCO && canGuaranteeTCO(CallConv))
2151 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2152
2153 // A sibcall is ABI-compatible and does not need to adjust the stack pointer.
2154 int FPDiff = 0;
2155 if (isTailCall && ShouldGuaranteeTCO && !IsSibcall) {
2156 // Lower arguments at fp - stackoffset + fpdiff.
2157 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2158
2159 FPDiff = NumBytesCallerPushed - NumBytes;
2160
2161 // Set the delta of movement of the returnaddr stackslot.
2162 // But only set if delta is greater than previous delta.
2163 if (FPDiff < X86Info->getTCReturnAddrDelta())
2164 X86Info->setTCReturnAddrDelta(FPDiff);
2165 }
2166
2167 unsigned NumBytesToPush = NumBytes;
2168 unsigned NumBytesToPop = NumBytes;
2169
2171 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2172
2173 // If we are doing a tail-call, any byval arguments will be written to stack
2174 // space which was used for incoming arguments. If any the values being used
2175 // are incoming byval arguments to this function, then they might be
2176 // overwritten by the stores of the outgoing arguments. To avoid this, we
2177 // need to make a temporary copy of them in local stack space, then copy back
2178 // to the argument area.
2179 // FIXME: There's potential to improve the code by using virtual registers for
2180 // temporary storage, and letting the register allocator spill if needed.
2181 SmallVector<SDValue, 8> ByValTemporaries;
2182 SDValue ByValTempChain;
2183 if (isTailCall) {
2184 // Use null SDValue to mean "no temporary recorded for this arg index".
2185 ByValTemporaries.assign(OutVals.size(), SDValue());
2186
2187 SmallVector<SDValue, 8> ByValCopyChains;
2188 for (const CCValAssign &VA : ArgLocs) {
2189 unsigned ArgIdx = VA.getValNo();
2190 SDValue Src = OutVals[ArgIdx];
2191 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2192
2193 if (!Flags.isByVal())
2194 continue;
2195
2196 auto PtrVT = getPointerTy(DAG.getDataLayout());
2197
2198 if (!StackPtr.getNode())
2199 StackPtr =
2200 DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT);
2201
2202 // Destination: where this byval should live in the calleeโ€™s frame
2203 // after the tail call.
2204 int64_t Offset = VA.getLocMemOffset() + FPDiff;
2205 uint64_t Size = VA.getLocVT().getFixedSizeInBits() / 8;
2207 /*IsImmutable=*/true);
2208 SDValue Dst = DAG.getFrameIndex(FI, PtrVT);
2209
2210 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2211
2212 if (Copy == NoCopy) {
2213 // If the argument is already at the correct offset on the stack
2214 // (because we are forwarding a byval argument from our caller), we
2215 // don't need any copying.
2216 continue;
2217 } else if (Copy == CopyOnce) {
2218 // If the argument is in our local stack frame, no other argument
2219 // preparation can clobber it, so we can copy it to the final location
2220 // later.
2221 ByValTemporaries[ArgIdx] = Src;
2222 } else {
2223 assert(Copy == CopyViaTemp && "unexpected enum value");
2224 // If we might be copying this argument from the outgoing argument
2225 // stack area, we need to copy via a temporary in the local stack
2226 // frame.
2227 MachineFrameInfo &MFI = MF.getFrameInfo();
2228 int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(),
2229 Flags.getNonZeroByValAlign(),
2230 /*isSS=*/false);
2231 SDValue Temp =
2232 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2233
2234 SDValue CopyChain =
2235 CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl);
2236 ByValCopyChains.push_back(CopyChain);
2237 ByValTemporaries[ArgIdx] = Temp;
2238 }
2239 }
2240 if (!ByValCopyChains.empty())
2241 ByValTempChain =
2242 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2243 }
2244
2245 // If we have an inalloca argument, all stack space has already been allocated
2246 // for us and be right at the top of the stack. We don't support multiple
2247 // arguments passed in memory when using inalloca.
2248 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2249 NumBytesToPush = 0;
2250 if (!ArgLocs.back().isMemLoc())
2251 report_fatal_error("cannot use inalloca attribute on a register "
2252 "parameter");
2253 if (ArgLocs.back().getLocMemOffset() != 0)
2254 report_fatal_error("any parameter with the inalloca attribute must be "
2255 "the only memory argument");
2256 } else if (CLI.IsPreallocated) {
2257 assert(ArgLocs.back().isMemLoc() &&
2258 "cannot use preallocated attribute on a register "
2259 "parameter");
2260 SmallVector<size_t, 4> PreallocatedOffsets;
2261 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2262 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2263 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2264 }
2265 }
2266 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2267 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2268 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2269 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2270 NumBytesToPush = 0;
2271 }
2272
2273 if (!IsSibcall && !IsMustTail)
2274 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2275 NumBytes - NumBytesToPush, dl);
2276
2277 SDValue RetAddrFrIdx;
2278 // Load return address for tail calls.
2279 if (isTailCall && FPDiff)
2280 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2281 Is64Bit, FPDiff, dl);
2282
2284 SmallVector<SDValue, 8> MemOpChains;
2285
2286 // The next loop assumes that the locations are in the same order of the
2287 // input arguments.
2288 assert(isSortedByValueNo(ArgLocs) &&
2289 "Argument Location list must be sorted before lowering");
2290
2291 // Walk the register/memloc assignments, inserting copies/loads. In the case
2292 // of tail call optimization arguments are handle later.
2293 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2294 ++I, ++OutIndex) {
2295 assert(OutIndex < Outs.size() && "Invalid Out index");
2296 // Skip inalloca/preallocated arguments, they have already been written.
2297 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2298 if (Flags.isInAlloca() || Flags.isPreallocated())
2299 continue;
2300
2301 CCValAssign &VA = ArgLocs[I];
2302 EVT RegVT = VA.getLocVT();
2303 SDValue Arg = OutVals[OutIndex];
2304 bool isByVal = Flags.isByVal();
2305
2306 // Promote the value if needed.
2307 switch (VA.getLocInfo()) {
2308 default: llvm_unreachable("Unknown loc info!");
2309 case CCValAssign::Full: break;
2310 case CCValAssign::SExt:
2311 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2312 break;
2313 case CCValAssign::ZExt:
2314 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2315 break;
2316 case CCValAssign::AExt:
2317 if (Arg.getValueType().isVector() &&
2318 Arg.getValueType().getVectorElementType() == MVT::i1)
2319 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2320 else if (RegVT.is128BitVector()) {
2321 // Special case: passing MMX values in XMM registers.
2322 Arg = DAG.getBitcast(MVT::i64, Arg);
2323 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2324 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2325 } else
2326 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2327 break;
2328 case CCValAssign::BCvt:
2329 Arg = DAG.getBitcast(RegVT, Arg);
2330 break;
2331 case CCValAssign::Indirect: {
2332 if (isByVal) {
2333 // Memcpy the argument to a temporary stack slot to prevent
2334 // the caller from seeing any modifications the callee may make
2335 // as guaranteed by the `byval` attribute.
2336 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2337 Flags.getByValSize(),
2338 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2339 SDValue StackSlot =
2340 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2341 Chain =
2342 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2343 // From now on treat this as a regular pointer
2344 Arg = StackSlot;
2345 isByVal = false;
2346 } else {
2347 // Store the argument.
2348 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2349 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2350 Chain = DAG.getStore(
2351 Chain, dl, Arg, SpillSlot,
2353 Arg = SpillSlot;
2354 }
2355 break;
2356 }
2357 }
2358
2359 if (VA.needsCustom()) {
2360 assert(VA.getValVT() == MVT::v64i1 &&
2361 "Currently the only custom case is when we split v64i1 to 2 regs");
2362 // Split v64i1 value into two registers
2363 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2364 } else if (VA.isRegLoc()) {
2365 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2366 const TargetOptions &Options = DAG.getTarget().Options;
2367 if (Options.EmitCallSiteInfo)
2368 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2369 if (isVarArg && IsWin64) {
2370 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2371 // shadow reg if callee is a varargs function.
2372 Register ShadowReg;
2373 switch (VA.getLocReg()) {
2374 case X86::XMM0: ShadowReg = X86::RCX; break;
2375 case X86::XMM1: ShadowReg = X86::RDX; break;
2376 case X86::XMM2: ShadowReg = X86::R8; break;
2377 case X86::XMM3: ShadowReg = X86::R9; break;
2378 }
2379 if (ShadowReg)
2380 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2381 }
2382 } else if (!IsSibcall && (!isTailCall || (isByVal && !IsMustTail))) {
2383 assert(VA.isMemLoc());
2384 if (!StackPtr.getNode())
2385 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2387 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2388 dl, DAG, VA, Flags, isByVal));
2389 }
2390 }
2391
2392 if (!MemOpChains.empty())
2393 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2394
2395 if (Subtarget.isPICStyleGOT()) {
2396 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2397 // GOT pointer (except regcall).
2398 if (!isTailCall) {
2399 // Indirect call with RegCall calling convertion may use up all the
2400 // general registers, so it is not suitable to bind EBX reister for
2401 // GOT address, just let register allocator handle it.
2402 if (CallConv != CallingConv::X86_RegCall)
2403 RegsToPass.push_back(std::make_pair(
2404 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2405 getPointerTy(DAG.getDataLayout()))));
2406 } else {
2407 // If we are tail calling and generating PIC/GOT style code load the
2408 // address of the callee into ECX. The value in ecx is used as target of
2409 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2410 // for tail calls on PIC/GOT architectures. Normally we would just put the
2411 // address of GOT into ebx and then call target@PLT. But for tail calls
2412 // ebx would be restored (since ebx is callee saved) before jumping to the
2413 // target@PLT.
2414
2415 // Note: The actual moving to ECX is done further down.
2416 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2417 if (G && !G->getGlobal()->hasLocalLinkage() &&
2418 G->getGlobal()->hasDefaultVisibility())
2419 Callee = LowerGlobalAddress(Callee, DAG);
2420 else if (isa<ExternalSymbolSDNode>(Callee))
2421 Callee = LowerExternalSymbol(Callee, DAG);
2422 }
2423 }
2424
2425 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2426 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2427 // From AMD64 ABI document:
2428 // For calls that may call functions that use varargs or stdargs
2429 // (prototype-less calls or calls to functions containing ellipsis (...) in
2430 // the declaration) %al is used as hidden argument to specify the number
2431 // of SSE registers used. The contents of %al do not need to match exactly
2432 // the number of registers, but must be an ubound on the number of SSE
2433 // registers used and is in the range 0 - 8 inclusive.
2434
2435 // Count the number of XMM registers allocated.
2436 static const MCPhysReg XMMArgRegs[] = {
2437 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2438 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2439 };
2440 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2441 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2442 && "SSE registers cannot be used when SSE is disabled");
2443 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2444 DAG.getConstant(NumXMMRegs, dl,
2445 MVT::i8)));
2446 }
2447
2448 if (isVarArg && IsMustTail) {
2449 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2450 for (const auto &F : Forwards) {
2451 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2452 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2453 }
2454 }
2455
2456 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2457 // don't need this because the eligibility check rejects calls that require
2458 // shuffling arguments passed in memory.
2459 if (isTailCall && !IsSibcall) {
2460 // Force all the incoming stack arguments to be loaded from the stack
2461 // before any new outgoing arguments or the return address are stored to the
2462 // stack, because the outgoing stack slots may alias the incoming argument
2463 // stack slots, and the alias isn't otherwise explicit. This is slightly
2464 // more conservative than necessary, because it means that each store
2465 // effectively depends on every argument instead of just those arguments it
2466 // would clobber.
2467 Chain = DAG.getStackArgumentTokenFactor(Chain);
2468
2469 if (ByValTempChain)
2470 Chain =
2471 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain);
2472
2473 SmallVector<SDValue, 8> MemOpChains2;
2474 SDValue FIN;
2475 int FI = 0;
2476 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2477 ++I, ++OutsIndex) {
2478 CCValAssign &VA = ArgLocs[I];
2479
2480 if (VA.isRegLoc()) {
2481 if (VA.needsCustom()) {
2482 assert((CallConv == CallingConv::X86_RegCall) &&
2483 "Expecting custom case only in regcall calling convention");
2484 // This means that we are in special case where one argument was
2485 // passed through two register locations - Skip the next location
2486 ++I;
2487 }
2488
2489 continue;
2490 }
2491
2492 assert(VA.isMemLoc());
2493 SDValue Arg = OutVals[OutsIndex];
2494 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2495 // Skip inalloca/preallocated arguments. They don't require any work.
2496 if (Flags.isInAlloca() || Flags.isPreallocated())
2497 continue;
2498 // Create frame index.
2499 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2500 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2501 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2502 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2503
2504 if (Flags.isByVal()) {
2505 if (SDValue ByValSrc = ByValTemporaries[OutsIndex]) {
2506 auto PtrVT = getPointerTy(DAG.getDataLayout());
2507 SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
2508
2510 ByValSrc, DstAddr, Chain, Flags, DAG, dl));
2511 }
2512 } else {
2513 // Store relative to framepointer.
2514 MemOpChains2.push_back(DAG.getStore(
2515 Chain, dl, Arg, FIN,
2517 }
2518 }
2519
2520 if (!MemOpChains2.empty())
2521 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2522
2523 // Store the return address to the appropriate stack slot.
2524 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2526 RegInfo->getSlotSize(), FPDiff, dl);
2527 }
2528
2529 // Build a sequence of copy-to-reg nodes chained together with token chain
2530 // and glue operands which copy the outgoing args into registers.
2531 SDValue InGlue;
2532 for (const auto &[Reg, N] : RegsToPass) {
2533 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
2534 InGlue = Chain.getValue(1);
2535 }
2536
2537 bool IsImpCall = false;
2538 bool IsCFGuardCall = false;
2539 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2540 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2541 // In the 64-bit large code model, we have to make all calls
2542 // through a register, since the call instruction's 32-bit
2543 // pc-relative offset may not be large enough to hold the whole
2544 // address.
2545 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2546 Callee->getOpcode() == ISD::ExternalSymbol) {
2547 // Lower direct calls to global addresses and external symbols. Setting
2548 // ForCall to true here has the effect of removing WrapperRIP when possible
2549 // to allow direct calls to be selected without first materializing the
2550 // address into a register.
2551 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall);
2552 } else if (Subtarget.isTarget64BitILP32() &&
2553 Callee.getValueType() == MVT::i32) {
2554 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2555 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2556 } else if (Is64Bit && CB && isCFGuardCall(CB)) {
2557 // We'll use a specific psuedo instruction for tail calls to control flow
2558 // guard functions to guarantee the instruction used for the call. To do
2559 // this we need to unwrap the load now and use the CFG Func GV as the
2560 // callee.
2561 IsCFGuardCall = true;
2562 auto *LoadNode = cast<LoadSDNode>(Callee);
2563 GlobalAddressSDNode *GA =
2564 cast<GlobalAddressSDNode>(unwrapAddress(LoadNode->getBasePtr()));
2566 "CFG Call should be to a guard function");
2567 assert(LoadNode->getOffset()->isUndef() &&
2568 "CFG Function load should not have an offset");
2570 GA->getGlobal(), dl, GA->getValueType(0), 0, X86II::MO_NO_FLAG);
2571 }
2572
2574
2575 if (!IsSibcall && isTailCall && !IsMustTail) {
2576 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2577 InGlue = Chain.getValue(1);
2578 }
2579
2580 Ops.push_back(Chain);
2581 Ops.push_back(Callee);
2582
2583 if (isTailCall)
2584 Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
2585
2586 // Add argument registers to the end of the list so that they are known live
2587 // into the call.
2588 for (const auto &[Reg, N] : RegsToPass)
2589 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2590
2591 // Add a register mask operand representing the call-preserved registers.
2592 const uint32_t *Mask = [&]() {
2593 auto AdaptedCC = CallConv;
2594 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2595 // use X86_INTR calling convention because it has the same CSR mask
2596 // (same preserved registers).
2597 if (HasNCSR)
2599 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2600 // to use the CSR_NoRegs_RegMask.
2601 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2602 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2603 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2604 }();
2605 assert(Mask && "Missing call preserved mask for calling convention");
2606
2607 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
2608 X86Info->setFPClobberedByCall(true);
2609 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2610 X86Info->setFPClobberedByInvoke(true);
2611 }
2612 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
2613 X86Info->setBPClobberedByCall(true);
2614 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2615 X86Info->setBPClobberedByInvoke(true);
2616 }
2617
2618 // If this is an invoke in a 32-bit function using a funclet-based
2619 // personality, assume the function clobbers all registers. If an exception
2620 // is thrown, the runtime will not restore CSRs.
2621 // FIXME: Model this more precisely so that we can register allocate across
2622 // the normal edge and spill and fill across the exceptional edge.
2623 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2624 const Function &CallerFn = MF.getFunction();
2625 EHPersonality Pers =
2626 CallerFn.hasPersonalityFn()
2629 if (isFuncletEHPersonality(Pers))
2630 Mask = RegInfo->getNoPreservedMask();
2631 }
2632
2633 // Define a new register mask from the existing mask.
2634 uint32_t *RegMask = nullptr;
2635
2636 // In some calling conventions we need to remove the used physical registers
2637 // from the reg mask. Create a new RegMask for such calling conventions.
2638 // RegMask for calling conventions that disable only return registers (e.g.
2639 // preserve_most) will be modified later in LowerCallResult.
2640 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2641 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2642 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2643
2644 // Allocate a new Reg Mask and copy Mask.
2645 RegMask = MF.allocateRegMask();
2646 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2647 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2648
2649 // Make sure all sub registers of the argument registers are reset
2650 // in the RegMask.
2651 if (ShouldDisableArgRegs) {
2652 for (auto const &RegPair : RegsToPass)
2653 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2654 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2655 }
2656
2657 // Create the RegMask Operand according to our updated mask.
2658 Ops.push_back(DAG.getRegisterMask(RegMask));
2659 } else {
2660 // Create the RegMask Operand according to the static mask.
2661 Ops.push_back(DAG.getRegisterMask(Mask));
2662 }
2663
2664 if (InGlue.getNode())
2665 Ops.push_back(InGlue);
2666
2667 if (isTailCall) {
2668 // We used to do:
2669 //// If this is the first return lowered for this function, add the regs
2670 //// to the liveout set for the function.
2671 // This isn't right, although it's probably harmless on x86; liveouts
2672 // should be computed from returns not tail calls. Consider a void
2673 // function making a tail call to a function returning int.
2675 auto Opcode =
2676 IsCFGuardCall ? X86ISD::TC_RETURN_GLOBALADDR : X86ISD::TC_RETURN;
2677 SDValue Ret = DAG.getNode(Opcode, dl, MVT::Other, Ops);
2678
2679 if (IsCFICall)
2680 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2681
2682 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2683 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2684 return Ret;
2685 }
2686
2687 // Returns a chain & a glue for retval copy to use.
2688 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2689 if (IsImpCall) {
2690 Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
2691 } else if (IsNoTrackIndirectCall) {
2692 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2693 } else if (IsCFGuardCall) {
2694 Chain = DAG.getNode(X86ISD::CALL_GLOBALADDR, dl, NodeTys, Ops);
2695 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2696 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2697 // expanded to the call, directly followed by a special marker sequence and
2698 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2699 assert(!isTailCall &&
2700 "tail calls cannot be marked with clang.arc.attachedcall");
2701 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2702
2703 // Add a target global address for the retainRV/claimRV runtime function
2704 // just before the call target.
2706 auto PtrVT = getPointerTy(DAG.getDataLayout());
2707 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2708 Ops.insert(Ops.begin() + 1, GA);
2709 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2710 } else {
2711 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2712 }
2713
2714 if (IsCFICall)
2715 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2716
2717 InGlue = Chain.getValue(1);
2718 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2719 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2720
2721 // Save heapallocsite metadata.
2722 if (CLI.CB)
2723 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2724 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2725
2726 // Create the CALLSEQ_END node.
2727 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2728 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2730 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2731 } else if (hasCalleePopSRet(Outs, ArgLocs, Subtarget)) {
2732 // If this call passes a struct-return pointer, the callee
2733 // pops that struct pointer.
2734 NumBytesForCalleeToPop = 4;
2735 }
2736
2737 // Returns a glue for retval copy to use.
2738 if (!IsSibcall) {
2739 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2740 InGlue, dl);
2741 InGlue = Chain.getValue(1);
2742 }
2743
2744 if (CallingConv::PreserveNone == CallConv)
2745 for (const ISD::OutputArg &Out : Outs) {
2746 if (Out.Flags.isSwiftSelf() || Out.Flags.isSwiftAsync() ||
2747 Out.Flags.isSwiftError()) {
2748 errorUnsupported(DAG, dl,
2749 "Swift attributes can't be used with preserve_none");
2750 break;
2751 }
2752 }
2753
2754 // Handle result values, copying them out of physregs into vregs that we
2755 // return.
2756 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2757 InVals, RegMask);
2758}
2759
2760//===----------------------------------------------------------------------===//
2761// Fast Calling Convention (tail call) implementation
2762//===----------------------------------------------------------------------===//
2763
2764// Like std call, callee cleans arguments, convention except that ECX is
2765// reserved for storing the tail called function address. Only 2 registers are
2766// free for argument passing (inreg). Tail call optimization is performed
2767// provided:
2768// * tailcallopt is enabled
2769// * caller/callee are fastcc
2770// On X86_64 architecture with GOT-style position independent code only local
2771// (within module) calls are supported at the moment.
2772// To keep the stack aligned according to platform abi the function
2773// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2774// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2775// If a tail called function callee has more arguments than the caller the
2776// caller needs to make sure that there is room to move the RETADDR to. This is
2777// achieved by reserving an area the size of the argument delta right after the
2778// original RETADDR, but before the saved framepointer or the spilled registers
2779// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2780// stack layout:
2781// arg1
2782// arg2
2783// RETADDR
2784// [ new RETADDR
2785// move area ]
2786// (possible EBP)
2787// ESI
2788// EDI
2789// local1 ..
2790
2791/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2792/// requirement.
2793unsigned
2794X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2795 SelectionDAG &DAG) const {
2796 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2797 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2798 assert(StackSize % SlotSize == 0 &&
2799 "StackSize must be a multiple of SlotSize");
2800 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2801}
2802
2803/// Return true if the given stack call argument is already available in the
2804/// same position (relatively) of the caller's incoming argument stack.
2805static
2807 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2808 const X86InstrInfo *TII, const CCValAssign &VA) {
2809 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2810
2811 for (;;) {
2812 // Look through nodes that don't alter the bits of the incoming value.
2813 unsigned Op = Arg.getOpcode();
2814 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2815 Op == ISD::AssertZext) {
2816 Arg = Arg.getOperand(0);
2817 continue;
2818 }
2819 if (Op == ISD::TRUNCATE) {
2820 const SDValue &TruncInput = Arg.getOperand(0);
2821 if (TruncInput.getOpcode() == ISD::AssertZext &&
2822 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2823 Arg.getValueType()) {
2824 Arg = TruncInput.getOperand(0);
2825 continue;
2826 }
2827 }
2828 break;
2829 }
2830
2831 int FI = INT_MAX;
2832 if (Arg.getOpcode() == ISD::CopyFromReg) {
2833 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2834 if (!VR.isVirtual())
2835 return false;
2836 MachineInstr *Def = MRI->getVRegDef(VR);
2837 if (!Def)
2838 return false;
2839 if (!Flags.isByVal()) {
2840 if (!TII->isLoadFromStackSlot(*Def, FI))
2841 return false;
2842 } else {
2843 unsigned Opcode = Def->getOpcode();
2844 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2845 Opcode == X86::LEA64_32r) &&
2846 Def->getOperand(1).isFI()) {
2847 FI = Def->getOperand(1).getIndex();
2848 Bytes = Flags.getByValSize();
2849 } else
2850 return false;
2851 }
2852 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2853 if (Flags.isByVal())
2854 // ByVal argument is passed in as a pointer but it's now being
2855 // dereferenced. e.g.
2856 // define @foo(%struct.X* %A) {
2857 // tail call @bar(%struct.X* byval %A)
2858 // }
2859 return false;
2860 SDValue Ptr = Ld->getBasePtr();
2862 if (!FINode)
2863 return false;
2864 FI = FINode->getIndex();
2865 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2867 FI = FINode->getIndex();
2868 Bytes = Flags.getByValSize();
2869 } else
2870 return false;
2871
2872 assert(FI != INT_MAX);
2873 if (!MFI.isFixedObjectIndex(FI))
2874 return false;
2875
2876 if (Offset != MFI.getObjectOffset(FI))
2877 return false;
2878
2879 // If this is not byval, check that the argument stack object is immutable.
2880 // inalloca and argument copy elision can create mutable argument stack
2881 // objects. Byval objects can be mutated, but a byval call intends to pass the
2882 // mutated memory.
2883 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2884 return false;
2885
2886 if (VA.getLocVT().getFixedSizeInBits() >
2888 // If the argument location is wider than the argument type, check that any
2889 // extension flags match.
2890 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2891 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2892 return false;
2893 }
2894 }
2895
2896 return Bytes == MFI.getObjectSize(FI);
2897}
2898
2899static bool
2901 Register CallerSRetReg) {
2902 const auto &Outs = CLI.Outs;
2903 const auto &OutVals = CLI.OutVals;
2904
2905 // We know the caller has a sret pointer argument (CallerSRetReg). Locate the
2906 // operand index within the callee that may have a sret pointer too.
2907 unsigned Pos = 0;
2908 for (unsigned E = Outs.size(); Pos != E; ++Pos)
2909 if (Outs[Pos].Flags.isSRet())
2910 break;
2911 // Bail out if the callee has not any sret argument.
2912 if (Pos == Outs.size())
2913 return false;
2914
2915 // At this point, either the caller is forwarding its sret argument to the
2916 // callee, or the callee is being passed a different sret pointer. We now look
2917 // for a CopyToReg, where the callee sret argument is written into a new vreg
2918 // (which should later be %rax/%eax, if this is returned).
2919 SDValue SRetArgVal = OutVals[Pos];
2920 for (SDNode *User : SRetArgVal->users()) {
2921 if (User->getOpcode() != ISD::CopyToReg)
2922 continue;
2924 if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal)
2925 return true;
2926 }
2927
2928 return false;
2929}
2930
2931/// Check whether the call is eligible for sibling call optimization. Sibling
2932/// calls are loosely defined to be simple, profitable tail calls that only
2933/// require adjusting register parameters. We do not speculatively to optimize
2934/// complex calls that require lots of argument memory operations that may
2935/// alias.
2936///
2937/// Note that LLVM supports multiple ways, such as musttail, to force tail call
2938/// emission. Returning false from this function will not prevent tail call
2939/// emission in all cases.
2940bool X86TargetLowering::isEligibleForSiblingCallOpt(
2942 SmallVectorImpl<CCValAssign> &ArgLocs) const {
2943 SelectionDAG &DAG = CLI.DAG;
2944 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2945 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2946 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2947 SDValue Callee = CLI.Callee;
2948 CallingConv::ID CalleeCC = CLI.CallConv;
2949 bool isVarArg = CLI.IsVarArg;
2950
2951 if (!mayTailCallThisCC(CalleeCC))
2952 return false;
2953
2954 // If -tailcallopt is specified, make fastcc functions tail-callable.
2955 MachineFunction &MF = DAG.getMachineFunction();
2956 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2957 const Function &CallerF = MF.getFunction();
2958
2959 // If the function return type is x86_fp80 and the callee return type is not,
2960 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2961 // perform a tailcall optimization here.
2962 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2963 return false;
2964
2965 // Win64 functions have extra shadow space for argument homing. Don't do the
2966 // sibcall if the caller and callee have mismatched expectations for this
2967 // space.
2968 CallingConv::ID CallerCC = CallerF.getCallingConv();
2969 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2970 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2971 if (IsCalleeWin64 != IsCallerWin64)
2972 return false;
2973
2974 // If we are using a GOT, don't generate sibling calls to non-local,
2975 // default-visibility symbols. Tail calling such a symbol requires using a GOT
2976 // relocation, which forces early binding of the symbol. This breaks code that
2977 // require lazy function symbol resolution. Using musttail or
2978 // GuaranteedTailCallOpt will override this.
2979 if (Subtarget.isPICStyleGOT()) {
2980 if (isa<ExternalSymbolSDNode>(Callee))
2981 return false;
2982 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2983 if (!G->getGlobal()->hasLocalLinkage() &&
2984 G->getGlobal()->hasDefaultVisibility())
2985 return false;
2986 }
2987 }
2988
2989 // Look for obvious safe cases to perform tail call optimization that do not
2990 // require ABI changes. This is what gcc calls sibcall.
2991
2992 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2993 // emit a special epilogue.
2994 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2995 if (RegInfo->hasStackRealignment(MF))
2996 return false;
2997
2998 // Avoid sibcall optimization if we are an sret return function and the callee
2999 // is incompatible, unless such premises are proven wrong. See comment in
3000 // LowerReturn about why hasStructRetAttr is insufficient.
3001 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3002 // For a compatible tail call the callee must return our sret pointer. So it
3003 // needs to be (a) an sret function itself and (b) we pass our sret as its
3004 // sret. Condition #b is harder to determine.
3005 if (!mayBeSRetTailCallCompatible(CLI, SRetReg))
3006 return false;
3007 } else if (hasCalleePopSRet(Outs, ArgLocs, Subtarget))
3008 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
3009 // expect that.
3010 return false;
3011
3012 // Do not sibcall optimize vararg calls unless all arguments are passed via
3013 // registers.
3014 LLVMContext &C = *DAG.getContext();
3015 if (isVarArg && !Outs.empty()) {
3016 // Optimizing for varargs on Win64 is unlikely to be safe without
3017 // additional testing.
3018 if (IsCalleeWin64 || IsCallerWin64)
3019 return false;
3020
3021 for (const auto &VA : ArgLocs)
3022 if (!VA.isRegLoc())
3023 return false;
3024 }
3025
3026 // If the call result is in ST0 / ST1, it needs to be popped off the x87
3027 // stack. Therefore, if it's not used by the call it is not safe to optimize
3028 // this into a sibcall.
3029 bool Unused = false;
3030 for (const auto &In : Ins) {
3031 if (!In.Used) {
3032 Unused = true;
3033 break;
3034 }
3035 }
3036 if (Unused) {
3038 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
3039 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3040 for (const auto &VA : RVLocs) {
3041 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
3042 return false;
3043 }
3044 }
3045
3046 // Check that the call results are passed in the same way.
3047 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3049 return false;
3050 // The callee has to preserve all registers the caller needs to preserve.
3051 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3052 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3053 if (CallerCC != CalleeCC) {
3054 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3055 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3056 return false;
3057 }
3058
3059 // The stack frame of the caller cannot be replaced by the tail-callee one's
3060 // if the function is required to preserve all the registers. Conservatively
3061 // prevent tail optimization even if hypothetically all the registers are used
3062 // for passing formal parameters or returning values.
3063 if (CallerF.hasFnAttribute("no_caller_saved_registers"))
3064 return false;
3065
3066 unsigned StackArgsSize = CCInfo.getStackSize();
3067
3068 // If the callee takes no arguments then go on to check the results of the
3069 // call.
3070 if (!Outs.empty()) {
3071 if (StackArgsSize > 0) {
3072 // Check if the arguments are already laid out in the right way as
3073 // the caller's fixed stack objects.
3074 MachineFrameInfo &MFI = MF.getFrameInfo();
3075 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3076 const X86InstrInfo *TII = Subtarget.getInstrInfo();
3077 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
3078 const CCValAssign &VA = ArgLocs[I];
3079 SDValue Arg = OutVals[I];
3080 ISD::ArgFlagsTy Flags = Outs[I].Flags;
3082 return false;
3083 if (!VA.isRegLoc()) {
3084 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
3085 TII, VA))
3086 return false;
3087 }
3088 }
3089 }
3090
3091 bool PositionIndependent = isPositionIndependent();
3092 // If the tailcall address may be in a register, then make sure it's
3093 // possible to register allocate for it. In 32-bit, the call address can
3094 // only target EAX, EDX, or ECX since the tail call must be scheduled after
3095 // callee-saved registers are restored. These happen to be the same
3096 // registers used to pass 'inreg' arguments so watch out for those.
3097 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
3098 !isa<ExternalSymbolSDNode>(Callee)) ||
3099 PositionIndependent)) {
3100 unsigned NumInRegs = 0;
3101 // In PIC we need an extra register to formulate the address computation
3102 // for the callee.
3103 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
3104
3105 for (const auto &VA : ArgLocs) {
3106 if (!VA.isRegLoc())
3107 continue;
3108 Register Reg = VA.getLocReg();
3109 switch (Reg) {
3110 default: break;
3111 case X86::EAX: case X86::EDX: case X86::ECX:
3112 if (++NumInRegs == MaxInRegs)
3113 return false;
3114 break;
3115 }
3116 }
3117 }
3118
3119 const MachineRegisterInfo &MRI = MF.getRegInfo();
3120 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3121 return false;
3122 }
3123
3124 bool CalleeWillPop =
3125 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
3127
3128 if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
3129 // If we have bytes to pop, the callee must pop them.
3130 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
3131 if (!CalleePopMatches)
3132 return false;
3133 } else if (CalleeWillPop && StackArgsSize > 0) {
3134 // If we don't have bytes to pop, make sure the callee doesn't pop any.
3135 return false;
3136 }
3137
3138 return true;
3139}
3140
3141/// Determines whether the callee is required to pop its own arguments.
3142/// Callee pop is necessary to support tail calls.
3144 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
3145 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
3146 // can guarantee TCO.
3147 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
3148 return true;
3149
3150 switch (CallingConv) {
3151 default:
3152 return false;
3157 return !is64Bit;
3158 }
3159}
return SDValue()
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static bool IsIndirectCall(const MachineInstr *MI)
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const M68kInstrInfo *TII, const CCValAssign &VA)
Return true if the given stack call argument is already available in the same position (relatively) o...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static SDValue getPopFromX87Reg(SelectionDAG &DAG, SDValue Chain, const SDLoc &dl, Register Reg, EVT VT, SDValue Glue)
static bool mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI, Register CallerSRetReg)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const SmallVectorImpl< CCValAssign > &ArgLocs, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
CCState - This class holds information needed while lowering arguments and return values.
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
void convertToReg(MCRegister Reg)
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
LLVMContext & getContext() const
Definition IRBuilder.h:203
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:629
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
ArrayRef< std::pair< MCRegister, Register > > liveins() const
LLVM_ABI void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
Class to represent struct types.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const
Inserts necessary declarations for SSP (stack protection) purpose.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
Returns the target-specific address of the unsafe stack pointer.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual ArrayRef< MCPhysReg > getRoundingControlRegisters() const
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isAndroid() const
Tests whether the target is Android.
Definition Triple.h:826
bool isMusl() const
Tests whether the environment is musl-libc.
Definition Triple.h:841
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition Triple.h:764
bool isOSFuchsia() const
Definition Triple.h:672
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:161
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
const uint32_t * getNoPreservedMask() const override
bool hasSSE1() const
const Triple & getTargetTriple() const
bool useAVX512Regs() const
bool isCallingConvWin64(CallingConv::ID CC) const
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool useSoftFloat() const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const override
Inserts necessary declarations for SSP (stack protection) purpose.
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
SDValue unwrapAddress(SDValue N) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ GlobalAddress
Definition ISDOpcodes.h:88
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ POP_FROM_X87_REG
The same as ISD::CopyFromReg except that this node makes it explicit that it may lower to an x87 FPU ...
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
LLVM_ABI bool isCFGuardCall(const CallBase *CB)
Definition CFGuard.cpp:318
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isCFGuardFunction(const GlobalValue *GV)
Definition CFGuard.cpp:323
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1969
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr Align Constant()
Allow constructions of constexpr Align.
Definition Alignment.h:88
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition ValueTypes.h:240
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isVectorOf(EVT EltVT) const
Return true if this is a vector with matching element type.
Definition ValueTypes.h:181
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
Type * RetTy
Same as OrigRetTy, or partially legalized for soft float libcalls.