60#define DEBUG_TYPE "expand-ir-insts"
71 cl::desc(
"fp convert instructions on integers with "
72 "more than <N> bits are expanded."));
77 cl::desc(
"div and rem instructions on integers with "
78 "more than <N> bits are expanded."));
92 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
105 "ShiftAmt out of range; callers should handle ShiftAmt == 0");
107 Value *Bias = Builder.CreateLShr(Sign,
BitWidth - ShiftAmt,
"bias");
108 return Builder.CreateAdd(
X, Bias,
"adjusted");
124 bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
127 bool IsExact = IsDiv && BO->
isExact();
130 "Expected power-of-2 constant divisor");
135 unsigned BitWidth = Ty->getIntegerBitWidth();
137 APInt DivisorVal =
C->getValue();
138 bool IsNegativeDivisor = IsSigned && DivisorVal.
isNegative();
151 Result = IsNegativeDivisor ? Builder.CreateNeg(
X) :
X;
153 Result = ConstantInt::get(Ty, 0);
154 }
else if (IsSigned) {
160 X = Builder.CreateFreeze(
X,
X->getName() +
".fr");
164 Value *Quotient = Builder.CreateAShr(
165 Dividend, ShiftAmt, IsDiv && IsNegativeDivisor ?
"pre.neg" :
"shifted",
168 Result = IsNegativeDivisor ? Builder.CreateNeg(Quotient) : Quotient;
172 Value *Truncated = Builder.CreateShl(Quotient, ShiftAmt,
"truncated");
173 Result = Builder.CreateSub(
X, Truncated);
177 Result = Builder.CreateLShr(
X, ShiftAmt,
"", IsExact);
180 Result = Builder.CreateAnd(
X, ConstantInt::get(Ty, Mask));
221 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
225 static bool canExpandType(
Type *Ty) {
232 static bool shouldExpandFremType(
const TargetLowering &TLI, EVT VT) {
233 assert(!VT.
isVector() &&
"Cannot handle vector type; must scalarize first");
235 TargetLowering::LegalizeAction::Expand;
238 static bool shouldExpandFremType(
const TargetLowering &TLI,
Type *Ty) {
247 static bool shouldExpandAnyFremType(
const TargetLowering &TLI) {
248 return any_of(ExpandableTypes,
249 [&](MVT V) {
return shouldExpandFremType(TLI, EVT(V)); });
253 assert(canExpandType(Ty) &&
"Expected supported floating point type");
257 Type *ComputeTy = Ty;
261 unsigned MaxIter = 2;
269 unsigned Precision = APFloat::semanticsPrecision(Ty->
getFltSemantics());
270 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
286 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
287 Bits(ConstantInt::
get(ExTy, Bits)), One(ConstantInt::
get(ExTy, 1)) {}
289 Value *createRcp(
Value *V,
const Twine &Name)
const {
292 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
304 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
306 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {},
"ax");
309 Value *Axp = B.CreateFAdd(AxUpdate, Ay,
"axp");
310 return B.CreateSelect(Clt, Axp, AxUpdate,
"ax");
316 std::pair<Value *, Value *> buildExpAndPower(
Value *Src,
Value *NewExp,
318 const Twine &PowName)
const {
322 Type *Ty = Src->getType();
323 Type *ExTy = B.getInt32Ty();
324 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
325 Value *Mant = B.CreateExtractValue(Frexp, {0});
326 Value *
Exp = B.CreateExtractValue(Frexp, {1});
328 Exp = B.CreateSub(Exp, One, ExName);
329 Value *
Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
338 void buildRemainderComputation(
Value *AxInitial,
Value *AyInitial,
Value *
X,
339 PHINode *RetPhi, FastMathFlags FMF)
const {
340 IRBuilder<>::FastMathFlagGuard Guard(B);
341 B.setFastMathFlags(FMF);
348 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits,
"ex",
"ax");
349 auto [Ay, Ey] = buildExpAndPower(AyInitial, One,
"ey",
"ay");
354 Value *Nb = B.CreateSub(Ex, Ey,
"nb");
355 Value *Ayinv = createRcp(Ay,
"ayinv");
371 B.SetInsertPoint(LoopBB);
372 PHINode *NbIv = B.CreatePHI(Nb->
getType(), 2,
"nb_iv");
375 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2,
"ax_loop_phi");
376 AxPhi->addIncoming(Ax, PreheaderBB);
378 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
379 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {},
"ax_update");
380 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
381 NbIv->
addIncoming(B.CreateSub(NbIv, Bits,
"nb_update"), LoopBB);
388 B.SetInsertPoint(ExitBB);
390 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2,
"ax_exit_phi");
391 AxPhiExit->addIncoming(Ax, PreheaderBB);
392 AxPhiExit->addIncoming(AxPhi, LoopBB);
393 auto *NbExitPhi = B.CreatePHI(Nb->
getType(), 2,
"nb_exit_phi");
394 NbExitPhi->addIncoming(NbIv, LoopBB);
395 NbExitPhi->addIncoming(Nb, PreheaderBB);
397 Value *AxFinal = B.CreateLdexp(
398 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {},
"ax");
399 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
404 AxFinal = B.CreateLdexp(AxFinal, Ey, {},
"ax");
405 if (ComputeFpTy != FremTy)
406 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
407 Value *Ret = B.CreateCopySign(AxFinal,
X);
416 void buildElseBranch(
Value *Ax,
Value *Ay,
Value *
X, PHINode *RetPhi)
const {
420 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign,
X);
428 std::optional<SimplifyQuery> &SQ,
440 Ret = B.CreateSelect(XFinite, Ret, Nan);
448 IRBuilder<>::FastMathFlagGuard Guard(
B);
453 B.clearFastMathFlags();
456 Value *Trunc =
B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
457 Value *Neg =
B.CreateFNeg(Trunc);
459 return B.CreateFMA(Neg,
Y,
X);
463 std::optional<SimplifyQuery> &SQ)
const {
464 assert(
X->getType() == FremTy &&
Y->getType() == FremTy);
466 FastMathFlags FMF =
B.getFastMathFlags();
475 Value *Ax =
B.CreateFAbs(
X, {},
"ax");
476 Value *Ay =
B.CreateFAbs(
Y, {},
"ay");
477 if (ComputeFpTy !=
X->getType()) {
478 Ax =
B.CreateFPExt(Ax, ComputeFpTy,
"ax");
479 Ay =
B.CreateFPExt(Ay, ComputeFpTy,
"ay");
481 Value *AxAyCmp =
B.CreateFCmpOGT(Ax, Ay);
483 PHINode *RetPhi =
B.CreatePHI(FremTy, 2,
"ret");
489 Ret = handleInputCornerCases(Ret,
X,
Y, SQ, FMF.
noInfs());
496 auto SavedInsertPt =
B.GetInsertPoint();
504 FastMathFlags ComputeFMF = FMF;
508 B.SetInsertPoint(ThenBB);
509 buildRemainderComputation(Ax, Ay,
X, RetPhi, FMF);
513 B.SetInsertPoint(ElseBB);
514 buildElseBranch(Ax, Ay,
X, RetPhi);
517 B.SetInsertPoint(SavedInsertPt);
525 Type *Ty =
I.getType();
526 assert(FRemExpander::canExpandType(Ty) &&
527 "Expected supported floating point type");
535 B.setFastMathFlags(FMF);
536 B.SetCurrentDebugLocation(
I.getDebugLoc());
538 const FRemExpander Expander = FRemExpander::create(
B, Ty);
540 ? Expander.buildApproxFRem(
I.getOperand(0),
I.getOperand(1))
541 : Expander.buildFRem(
I.getOperand(0),
I.getOperand(1), SQ);
543 I.replaceAllUsesWith(Ret);
609 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
614 if (FloatVal->getType()->isHalfTy() &&
BitWidth >= 32) {
615 if (FPToI->
getOpcode() == Instruction::FPToUI) {
616 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
617 A1 = Builder.CreateZExt(A0, IntTy);
619 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
620 A1 = Builder.CreateSExt(A0, IntTy);
630 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
631 unsigned FloatWidth =
632 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
633 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
634 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
636 Value *ImplicitBit = ConstantInt::get(
638 Value *SignificandMask = ConstantInt::get(
643 Entry->setName(
Twine(Entry->getName(),
"fp-to-i-entry"));
649 "fp-to-i-if-check.saturate",
F, End);
654 Builder.getContext(),
"fp-to-i-if-check.exp.size",
F, End);
660 Entry->getTerminator()->eraseFromParent();
663 Builder.SetInsertPoint(Entry);
666 FloatVal = Builder.CreateFreeze(FloatVal);
669 if (FloatVal->getType()->isX86_FP80Ty())
672 Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);
673 Value *PosOrNeg, *Sign;
677 Sign = Builder.CreateSelectWithUnknownProfile(
682 Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
683 Value *BiasedExp = Builder.CreateAnd(
684 And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1),
"biased.exp");
685 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
686 Value *Significand = Builder.CreateOr(Abs, ImplicitBit,
"significand");
687 Value *ZeroResultCond = Builder.CreateICmpULT(
688 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias),
"exp.is.negative");
690 Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal,
"is.nan");
691 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);
693 Value *IsNeg = Builder.CreateIsNeg(ARep);
694 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);
698 ZeroResultCond, End, IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
706 Builder.SetInsertPoint(CheckSaturateBB);
712 uint64_t MaxBiasedExp = (1ULL << ExponentWidth) - 1;
713 if (SaturatingBiasedExp > MaxBiasedExp)
714 SaturatingBiasedExp = MaxBiasedExp;
715 Value *Cmp3 = Builder.CreateICmpUGE(
716 BiasedExp, ConstantInt::get(FloatIntTy, SaturatingBiasedExp));
717 Value *CondBrSat = Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
721 LLVMContext::MD_prof,
726 Builder.SetInsertPoint(SaturateBB);
733 Saturated = Builder.CreateSelectWithUnknownProfile(
734 PosOrNeg, SignedMax, SignedMin,
"saturated");
738 Builder.CreateBr(End);
742 Builder.SetInsertPoint(CheckExpSizeBB);
743 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
744 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
745 "exp.smaller.mantissa.width");
749 Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
755 Builder.SetInsertPoint(ExpSmallBB);
756 Value *Sub13 = Builder.CreateSub(
757 Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
759 Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
761 ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);
762 Builder.CreateBr(End);
765 Builder.SetInsertPoint(ExpLargeBB);
766 Value *Sub15 = Builder.CreateAdd(
769 FloatIntTy, -
static_cast<int64_t
>(ExponentBias + FPMantissaWidth)));
770 Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
771 Value *ExpLargeRes = Builder.CreateShl(
772 SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));
774 ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);
775 Builder.CreateBr(End);
778 Builder.SetInsertPoint(End, End->
begin());
779 PHINode *Retval0 = Builder.CreatePHI(FPToI->
getType(), 3 + IsSaturating);
882 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
886 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
889 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
890 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
892 bool IsSigned = IToFP->
getOpcode() == Instruction::SIToFP;
896 IntVal = Builder.CreateFreeze(IntVal);
902 IntTy = Builder.getIntNTy(
BitWidth);
903 IntVal = Builder.CreateIntCast(IntVal, IntTy, IsSigned);
907 Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
908 Builder.getIntN(
BitWidth, FPMantissaWidth + 3));
912 Entry->setName(
Twine(Entry->getName(),
"itofp-entry"));
932 Entry->getTerminator()->eraseFromParent();
939 Builder.SetInsertPoint(Entry);
943 Value *CondBrEntry = Builder.CreateCondBr(Cmp, End, IfEnd);
947 LLVMContext::MD_prof,
952 Builder.SetInsertPoint(IfEnd);
955 Value *
Xor = Builder.CreateXor(Shr, IntVal);
957 Value *
Call = Builder.CreateCall(CTLZ, {IsSigned ?
Sub : IntVal, True});
958 Value *Cast = Builder.CreateTrunc(
Call, Builder.getInt32Ty());
959 int BitWidthNew = FloatWidth == 128 ?
BitWidth : 32;
960 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth),
961 FloatWidth == 128 ?
Call : Cast);
962 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth - 1),
963 FloatWidth == 128 ?
Call : Cast);
964 Value *Cmp3 = Builder.CreateICmpSGT(
965 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
969 Value *CondBrIfEnd = Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
973 LLVMContext::MD_prof,
978 Builder.SetInsertPoint(IfThen4);
980 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
981 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
989 LLVMContext::MD_prof,
992 llvm::MDBuilder::kUnlikelyBranchWeight,
993 llvm::MDBuilder::kUnlikelyBranchWeight}));
997 Builder.SetInsertPoint(SwBB);
999 Builder.CreateShl(IsSigned ?
Sub : IntVal, Builder.getIntN(
BitWidth, 1));
1000 Builder.CreateBr(SwEpilog);
1003 Builder.SetInsertPoint(SwDefault);
1004 Value *Sub5 = Builder.CreateSub(
1005 Builder.getIntN(BitWidthNew,
BitWidth - FPMantissaWidth - 3),
1006 FloatWidth == 128 ?
Call : Cast);
1007 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
1008 Value *Shr6 = Builder.CreateLShr(IsSigned ?
Sub : IntVal,
1009 FloatWidth == 128 ? Sub5 : ShProm);
1011 Builder.CreateAdd(FloatWidth == 128 ?
Call : Cast,
1012 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
1013 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
1015 FloatWidth == 128 ? Sub8 : ShProm9);
1016 Value *
And = Builder.CreateAnd(Shr9, IsSigned ?
Sub : IntVal);
1018 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
1019 Value *
Or = Builder.CreateOr(Shr6, Conv11);
1020 Builder.CreateBr(SwEpilog);
1023 Builder.SetInsertPoint(SwEpilog);
1024 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
1028 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
1029 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
1030 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
1031 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
1032 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
1033 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(
BitWidth, 1));
1034 Value *Shr18 =
nullptr;
1036 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 2));
1038 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 2));
1039 Value *A3 = Builder.CreateAnd(Inc, Temp1,
"a3");
1040 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(
BitWidth, 0));
1041 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
1042 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(
BitWidth, 32));
1043 Value *ExtractT64 =
nullptr;
1044 if (FloatWidth > 80)
1045 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1047 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
1050 Value *CondBrSwEpilog = Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
1054 LLVMContext::MD_prof,
1059 Builder.SetInsertPoint(IfThen20);
1060 Value *Shr21 =
nullptr;
1062 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 3));
1064 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 3));
1065 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
1066 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(
BitWidth, 32));
1067 Value *ExtractT62 =
nullptr;
1068 if (FloatWidth > 80)
1069 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
1071 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
1072 Builder.CreateBr(IfEnd26);
1075 Builder.SetInsertPoint(IfElse);
1076 Value *Sub24 = Builder.CreateAdd(
1077 FloatWidth == 128 ?
Call : Cast,
1079 -(
int)(
BitWidth - FPMantissaWidth - 1)));
1080 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
1081 Value *Shl26 = Builder.CreateShl(IsSigned ?
Sub : IntVal,
1082 FloatWidth == 128 ? Sub24 : ShProm25);
1083 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
1084 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(
BitWidth, 32));
1085 Value *ExtractT66 =
nullptr;
1086 if (FloatWidth > 80)
1087 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1089 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
1090 Builder.CreateBr(IfEnd26);
1093 Builder.SetInsertPoint(IfEnd26);
1094 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
1098 PHINode *AAddr1Off32 =
nullptr;
1099 if (FloatWidth > 32) {
1101 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
1107 if (FloatWidth <= 80) {
1108 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
1113 Value *And29 =
nullptr;
1114 if (FloatWidth > 80) {
1115 Value *Temp2 = Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
1117 And29 = Builder.CreateAnd(Shr, Temp2,
"and29");
1119 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
1120 And29 = Builder.CreateAnd(
1123 unsigned TempMod = FPMantissaWidth % 32;
1124 Value *And34 =
nullptr;
1125 Value *Shl30 =
nullptr;
1126 if (FloatWidth > 80) {
1128 Value *
Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
1129 Shl30 = Builder.CreateAdd(
1130 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
1131 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
1133 Value *
Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
1134 Shl30 = Builder.CreateAdd(
1135 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
1136 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
1137 Builder.getInt32((1 << TempMod) - 1));
1139 Value *Or35 =
nullptr;
1140 if (FloatWidth > 80) {
1141 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
1142 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
1143 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
1144 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
1145 Builder.getIntN(128, FPMantissaWidth));
1146 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
1147 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
1148 Or35 = Builder.CreateOr(Or34, A6);
1150 Value *Or31 = Builder.CreateOr(And34, And29);
1151 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
1153 Value *A4 =
nullptr;
1155 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
1156 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
1158 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
1159 Value *Or1 = Builder.CreateOr(Shl1, And1);
1160 A4 = Builder.CreateBitCast(Or1, IToFP->
getType());
1164 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1170 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1172 A4 = Builder.CreateBitCast(Or35, IToFP->
getType());
1182 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
1183 uint64_t MinInfExp = 1ULL << (ExponentWidth - 1);
1185 Value *MinInfExpVal = Builder.getIntN(BitWidthNew, MinInfExp);
1186 Value *Overflow = Builder.CreateICmpUGE(Sub2, MinInfExpVal);
1193 Inf = Builder.CreateSelect(IsNeg, NegInf, Inf);
1195 A4 = Builder.CreateSelect(Overflow, Inf, A4);
1197 Builder.CreateBr(End);
1200 Builder.SetInsertPoint(End, End->
begin());
1216 unsigned NumElements = VTy->getElementCount().getFixedValue();
1218 for (
unsigned Idx = 0; Idx < NumElements; ++Idx) {
1219 Value *Ext = Builder.CreateExtractElement(
I->getOperand(0), Idx);
1221 Value *NewOp =
nullptr;
1223 NewOp = Builder.CreateBinOp(
1224 BinOp->getOpcode(), Ext,
1225 Builder.CreateExtractElement(
I->getOperand(1), Idx));
1227 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
1228 I->getType()->getScalarType());
1230 assert(
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1231 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1232 NewOp = Builder.CreateIntrinsic(
I->getType()->getScalarType(),
1233 II->getIntrinsicID(), {Ext});
1237 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
1239 ScalarizedI->copyIRFlags(
I,
true);
1244 I->replaceAllUsesWith(Result);
1245 I->dropAllReferences();
1246 I->eraseFromParent();
1251 if (
I.getOperand(0)->getType()->isVectorTy())
1261 unsigned MaxLegalFpConvertBitWidth =
1270 bool DisableExpandLargeFp =
1272 bool DisableExpandLargeDivRem =
1274 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1276 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1280 Type *Ty =
I.getType();
1282 if (Ty->isScalableTy())
1285 switch (
I.getOpcode()) {
1286 case Instruction::FRem:
1287 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1288 case Instruction::FPToUI:
1289 case Instruction::FPToSI:
1290 return !DisableExpandLargeFp &&
1292 MaxLegalFpConvertBitWidth;
1293 case Instruction::UIToFP:
1294 case Instruction::SIToFP:
1295 return !DisableExpandLargeFp &&
1297 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1298 case Instruction::UDiv:
1299 case Instruction::SDiv:
1300 case Instruction::URem:
1301 case Instruction::SRem:
1306 return !DisableExpandLargeDivRem &&
1308 MaxLegalDivRemBitWidth;
1309 case Instruction::Call: {
1311 if (
II && (
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1312 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1313 return !DisableExpandLargeFp &&
1315 MaxLegalFpConvertBitWidth;
1327 if (!ShouldHandleInst(
I))
1334 while (!Worklist.
empty()) {
1337 switch (
I->getOpcode()) {
1338 case Instruction::FRem: {
1339 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1341 auto Res = std::make_optional<SimplifyQuery>(
1342 I->getModule()->getDataLayout(),
I);
1353 case Instruction::FPToUI:
1356 case Instruction::FPToSI:
1360 case Instruction::UIToFP:
1361 case Instruction::SIToFP:
1365 case Instruction::UDiv:
1366 case Instruction::SDiv:
1367 case Instruction::URem:
1368 case Instruction::SRem: {
1375 unsigned Opc = BO->getOpcode();
1376 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv)
1383 case Instruction::Call: {
1385 assert(
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1386 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1388 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1398class ExpandIRInstsLegacyPass :
public FunctionPass {
1405 : FunctionPass(
ID), OptLevel(OptLevel) {}
1410 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1411 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(
F);
1412 auto *TLI = Subtarget->getTargetLowering();
1413 AssumptionCache *AC =
nullptr;
1415 const LibcallLoweringInfo &Libcalls =
1416 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1417 *
F.getParent(), *Subtarget);
1419 if (OptLevel != CodeGenOptLevel::None && !
F.hasOptNone())
1420 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1421 return runImpl(
F, *TLI, Libcalls, AC);
1424 void getAnalysisUsage(AnalysisUsage &AU)
const override {
1427 if (OptLevel != CodeGenOptLevel::None)
1438 : TM(&TM), OptLevel(OptLevel) {}
1443 OS, MapClassName2PassName);
1445 OS <<
"O" << (int)OptLevel;
1462 if (!LibcallLowering) {
1464 "' analysis required");
1469 LibcallLowering->getLibcallLowering(*STI);
1475char ExpandIRInstsLegacyPass::ID = 0;
1477 "Expand certain fp instructions",
false,
false)
1483 return new ExpandIRInstsLegacyPass(OptLevel);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static cl::opt< unsigned > ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("div and rem instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static void expandPow2DivRem(BinaryOperator *BO)
Expand division or remainder by a power-of-2 constant.
static bool isSigned(unsigned Opcode)
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
static Value * addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth, unsigned ShiftAmt)
For signed div/rem by a power of 2, compute the bias-adjusted dividend: Sign = ashr X,...
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static bool isConstantPowerOfTwo(Value *V, bool SignedOp)
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BinaryOps getOpcode() const
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ ICMP_SGT
signed greater than
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
void setAllowReciprocal(bool B=true)
void setNoNaNs(bool B=true)
void setNoInfs(bool B=true)
FunctionPass class - This class is used to implement most global optimizations.
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
LLVM_ABI const fltSemantics & getFltSemantics() const
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI bool expandDivision(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
cl::opt< bool > ProfcheckDisableMetadataFixes
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI void applyProfMetadataIfEnabled(Value *V, llvm::function_ref< void(Instruction *)> setMetadataCallback)
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI FunctionPass * createExpandIRInstsPass(CodeGenOptLevel)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool expandRemainder(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
A CRTP mix-in to automatically provide informational APIs needed for passes.