LLVM 23.0.0git
ExpandIRInsts.cpp
Go to the documentation of this file.
1//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain instructions at the IR level.
9//
10// The following expansions are implemented:
11// - Expansion of โ€˜fptoui .. toโ€™, โ€˜fptosi .. toโ€™, โ€˜uitofp .. toโ€™, โ€˜sitofp
12// .. toโ€™ instructions with a bitwidth above a threshold. This is
13// useful for targets like x86_64 that cannot lower fp convertions
14// with more than 128 bits.
15//
16// - Expansion of โ€˜fremโ€˜ for types MVT::f16, MVT::f32, and MVT::f64 for
17// targets which use "Expand" as the legalization action for the
18// corresponding type.
19//
20// - Expansion of โ€˜udivโ€˜, โ€˜sdivโ€˜, โ€˜uremโ€˜, and โ€˜sremโ€˜ instructions with
21// a bitwidth above a threshold into a call to auto-generated
22// functions. This is useful for targets like x86_64 that cannot
23// lower divisions with more than 128 bits or targets like x86_32 that
24// cannot lower divisions with more than 64 bits.
25//
26// Instructions with vector types are scalarized first if their scalar
27// types can be expanded. Scalable vector types are not supported.
28//===----------------------------------------------------------------------===//
29
37#include "llvm/CodeGen/Passes.h"
41#include "llvm/IR/IRBuilder.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/MDBuilder.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/PassManager.h"
51#include "llvm/Pass.h"
58#include <optional>
59
60#define DEBUG_TYPE "expand-ir-insts"
61
62using namespace llvm;
63
64namespace llvm {
66}
67
69 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
71 cl::desc("fp convert instructions on integers with "
72 "more than <N> bits are expanded."));
73
75 ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
77 cl::desc("div and rem instructions on integers with "
78 "more than <N> bits are expanded."));
79
80static bool isConstantPowerOfTwo(Value *V, bool SignedOp) {
81 auto *C = dyn_cast<ConstantInt>(V);
82 if (!C)
83 return false;
84
85 APInt Val = C->getValue();
86 if (SignedOp && Val.isNegative())
87 Val = -Val;
88 return Val.isPowerOf2();
89}
90
91static bool isSigned(unsigned Opcode) {
92 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
93}
94
95/// For signed div/rem by a power of 2, compute the bias-adjusted dividend:
96/// Sign = ashr X, (BitWidth - 1) -- 0 or -1
97/// Bias = lshr Sign, (BitWidth - ShiftAmt) -- 0 or 2^ShiftAmt - 1
98/// Adjusted = add X, Bias
99/// The bias adds (2^ShiftAmt - 1) for negative X, correcting rounding towards
100/// zero (instead of towards -inf that a plain ashr would give).
101/// The lshr form is used instead of 'and' to avoid large immediate constants.
102static Value *addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth,
103 unsigned ShiftAmt) {
104 assert(ShiftAmt > 0 && ShiftAmt < BitWidth &&
105 "ShiftAmt out of range; callers should handle ShiftAmt == 0");
106 Value *Sign = Builder.CreateAShr(X, BitWidth - 1, "sign");
107 Value *Bias = Builder.CreateLShr(Sign, BitWidth - ShiftAmt, "bias");
108 return Builder.CreateAdd(X, Bias, "adjusted");
109}
110
111/// Expand division or remainder by a power-of-2 constant.
112/// Division (let C = log2(|divisor|)):
113/// udiv X, 2^C -> lshr X, C
114/// sdiv X, 2^C -> ashr (add X, Bias), C (Bias corrects rounding)
115/// sdiv exact X, 2^C -> ashr exact X, C (no bias needed)
116/// For negative power-of-2 divisors, the division result is negated.
117/// Remainder (let C = log2(|divisor|)):
118/// urem X, 2^C -> and X, (2^C - 1)
119/// srem X, 2^C -> sub X, (shl (ashr (add X, Bias), C), C)
121 LLVM_DEBUG(dbgs() << "Expanding instruction: " << *BO << '\n');
122
123 unsigned Opcode = BO->getOpcode();
124 bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
125 bool IsSigned = isSigned(Opcode);
126 // isExact() is only valid for div.
127 bool IsExact = IsDiv && BO->isExact();
128
129 assert(isConstantPowerOfTwo(BO->getOperand(1), IsSigned) &&
130 "Expected power-of-2 constant divisor");
131
132 Value *X = BO->getOperand(0);
133 auto *C = cast<ConstantInt>(BO->getOperand(1));
134 Type *Ty = BO->getType();
135 unsigned BitWidth = Ty->getIntegerBitWidth();
136
137 APInt DivisorVal = C->getValue();
138 bool IsNegativeDivisor = IsSigned && DivisorVal.isNegative();
139 // Use countr_zero() to get the shift amount directly from the bit pattern.
140 // This works correctly for both positive and negative powers of 2, including
141 // INT_MIN, without needing to negate the value first.
142 unsigned ShiftAmt = DivisorVal.countr_zero();
143
144 IRBuilder<> Builder(BO);
145 Value *Result;
146
147 if (ShiftAmt == 0) {
148 // Div by 1/-1: X / 1 = X, X / -1 = -X.
149 // Rem by 1/-1: always 0.
150 if (IsDiv)
151 Result = IsNegativeDivisor ? Builder.CreateNeg(X) : X;
152 else
153 Result = ConstantInt::get(Ty, 0);
154 } else if (IsSigned) {
155 // The signed expansion uses X multiple times (bias computation, shift,
156 // and sub for remainder). Freeze X to ensure consistent behavior if it is
157 // undef/poison. For exact division, no bias is needed and X is used only
158 // once, so freeze is unnecessary.
159 if (!IsExact && !isGuaranteedNotToBeUndefOrPoison(X))
160 X = Builder.CreateFreeze(X, X->getName() + ".fr");
161 // For exact division, no bias is needed since there's no rounding.
162 Value *Dividend =
163 IsExact ? X : addSignedBias(Builder, X, BitWidth, ShiftAmt);
164 Value *Quotient = Builder.CreateAShr(
165 Dividend, ShiftAmt, IsDiv && IsNegativeDivisor ? "pre.neg" : "shifted",
166 IsExact);
167 if (IsDiv) {
168 Result = IsNegativeDivisor ? Builder.CreateNeg(Quotient) : Quotient;
169 } else {
170 // Rem = X - (Quotient << ShiftAmt):
171 // clear lower ShiftAmt bits via round-trip shift, then subtract.
172 Value *Truncated = Builder.CreateShl(Quotient, ShiftAmt, "truncated");
173 Result = Builder.CreateSub(X, Truncated);
174 }
175 } else {
176 if (IsDiv) {
177 Result = Builder.CreateLShr(X, ShiftAmt, "", IsExact);
178 } else {
179 APInt Mask = APInt::getLowBitsSet(BitWidth, ShiftAmt);
180 Result = Builder.CreateAnd(X, ConstantInt::get(Ty, Mask));
181 }
182 }
183
184 BO->replaceAllUsesWith(Result);
185 if (Result != X)
186 if (auto *RI = dyn_cast<Instruction>(Result))
187 RI->takeName(BO);
188 BO->dropAllReferences();
189 BO->eraseFromParent();
190}
191
192/// This class implements a precise expansion of the frem instruction.
193/// The generated code is based on the fmod implementation in the AMD device
194/// libs.
195namespace {
196class FRemExpander {
197 /// The IRBuilder to use for the expansion.
198 IRBuilder<> &B;
199
200 /// Floating point type of the return value and the arguments of the FRem
201 /// instructions that should be expanded.
202 Type *FremTy;
203
204 /// Floating point type to use for the computation. This may be
205 /// wider than the \p FremTy.
206 Type *ComputeFpTy;
207
208 /// Integer type used to hold the exponents returned by frexp.
209 Type *ExTy;
210
211 /// How many bits of the quotient to compute per iteration of the
212 /// algorithm, stored as a value of type \p ExTy.
213 Value *Bits;
214
215 /// Constant 1 of type \p ExTy.
216 Value *One;
217
218 /// The frem argument/return types that can be expanded by this class.
219 // TODO: The expansion could work for other floating point types
220 // as well, but this would require additional testing.
221 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
222 MVT::f64};
223
224public:
225 static bool canExpandType(Type *Ty) {
226 EVT VT = EVT::getEVT(Ty);
227 assert(VT.isSimple() && "Can expand only simple types");
228
229 return is_contained(ExpandableTypes, VT.getSimpleVT());
230 }
231
232 static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
233 assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
234 return TLI.getOperationAction(ISD::FREM, VT) ==
235 TargetLowering::LegalizeAction::Expand;
236 }
237
238 static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
239 // Consider scalar type for simplicity. It seems unlikely that a
240 // vector type can be legalized without expansion if the scalar
241 // type cannot.
242 return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType()));
243 }
244
245 /// Return true if the pass should expand frem instructions of any type
246 /// for the target represented by \p TLI.
247 static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
248 return any_of(ExpandableTypes,
249 [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });
250 }
251
252 static FRemExpander create(IRBuilder<> &B, Type *Ty) {
253 assert(canExpandType(Ty) && "Expected supported floating point type");
254
255 // The type to use for the computation of the remainder. This may be
256 // wider than the input/result type which affects the ...
257 Type *ComputeTy = Ty;
258 // ... maximum number of iterations of the remainder computation loop
259 // to use. This value is for the case in which the computation
260 // uses the same input/result type.
261 unsigned MaxIter = 2;
262
263 if (Ty->isHalfTy()) {
264 // Use the wider type and less iterations.
265 ComputeTy = B.getFloatTy();
266 MaxIter = 1;
267 }
268
269 unsigned Precision = APFloat::semanticsPrecision(Ty->getFltSemantics());
270 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
271 }
272
273 /// Build the FRem expansion for the numerator \p X and the
274 /// denumerator \p Y. The type of X and Y must match \p FremTy. The
275 /// code will be generated at the insertion point of \p B and the
276 /// insertion point will be reset at exit.
277 Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
278
279 /// Build an approximate FRem expansion for the numerator \p X and
280 /// the denumerator \p Y at the insertion point of builder \p B.
281 /// The type of X and Y must match \p FremTy.
282 Value *buildApproxFRem(Value *X, Value *Y) const;
283
284private:
285 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
286 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
287 Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {}
288
289 Value *createRcp(Value *V, const Twine &Name) const {
290 // Leave it to later optimizations to turn this into an rcp
291 // instruction if available.
292 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
293 }
294
295 // Helper function to build the UPDATE_AX code which is common to the
296 // loop body and the "final iteration".
297 Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
298 // Build:
299 // float q = rint(ax * ayinv);
300 // ax = fma(-q, ay, ax);
301 // int clt = ax < 0.0f;
302 // float axp = ax + ay;
303 // ax = clt ? axp : ax;
304 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
305 {}, "q");
306 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");
307 Value *Clt = B.CreateFCmp(CmpInst::FCMP_OLT, AxUpdate,
308 ConstantFP::getZero(ComputeFpTy), "clt");
309 Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");
310 return B.CreateSelect(Clt, Axp, AxUpdate, "ax");
311 }
312
313 /// Build code to extract the exponent and mantissa of \p Src.
314 /// Return the exponent minus one for use as a loop bound and
315 /// the mantissa taken to the given \p NewExp power.
316 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
317 const Twine &ExName,
318 const Twine &PowName) const {
319 // Build:
320 // ExName = frexp_exp(Src) - 1;
321 // PowName = fldexp(frexp_mant(ExName), NewExp);
322 Type *Ty = Src->getType();
323 Type *ExTy = B.getInt32Ty();
324 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
325 Value *Mant = B.CreateExtractValue(Frexp, {0});
326 Value *Exp = B.CreateExtractValue(Frexp, {1});
327
328 Exp = B.CreateSub(Exp, One, ExName);
329 Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
330
331 return {Pow, Exp};
332 }
333
334 /// Build the main computation of the remainder for the case in which
335 /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
336 /// denumerator. Add the incoming edge from the computation result
337 /// to \p RetPhi.
338 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
339 PHINode *RetPhi, FastMathFlags FMF) const {
340 IRBuilder<>::FastMathFlagGuard Guard(B);
341 B.setFastMathFlags(FMF);
342
343 // Build:
344 // ex = frexp_exp(ax) - 1;
345 // ax = fldexp(frexp_mant(ax), bits);
346 // ey = frexp_exp(ay) - 1;
347 // ay = fledxp(frexp_mant(ay), 1);
348 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");
349 auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");
350
351 // Build:
352 // int nb = ex - ey;
353 // float ayinv = 1.0/ay;
354 Value *Nb = B.CreateSub(Ex, Ey, "nb");
355 Value *Ayinv = createRcp(Ay, "ayinv");
356
357 // Build: while (nb > bits)
358 BasicBlock *PreheaderBB = B.GetInsertBlock();
359 Function *Fun = PreheaderBB->getParent();
360 auto *LoopBB = BasicBlock::Create(B.getContext(), "frem.loop_body", Fun);
361 auto *ExitBB = BasicBlock::Create(B.getContext(), "frem.loop_exit", Fun);
362
363 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);
364
365 // Build loop body:
366 // UPDATE_AX
367 // ax = fldexp(ax, bits);
368 // nb -= bits;
369 // One iteration of the loop is factored out. The code shared by
370 // the loop and this "iteration" is denoted by UPDATE_AX.
371 B.SetInsertPoint(LoopBB);
372 PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");
373 NbIv->addIncoming(Nb, PreheaderBB);
374
375 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");
376 AxPhi->addIncoming(Ax, PreheaderBB);
377
378 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
379 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");
380 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
381 NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);
382
383 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);
384
385 // Build final iteration
386 // ax = fldexp(ax, nb - bits + 1);
387 // UPDATE_AX
388 B.SetInsertPoint(ExitBB);
389
390 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");
391 AxPhiExit->addIncoming(Ax, PreheaderBB);
392 AxPhiExit->addIncoming(AxPhi, LoopBB);
393 auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");
394 NbExitPhi->addIncoming(NbIv, LoopBB);
395 NbExitPhi->addIncoming(Nb, PreheaderBB);
396
397 Value *AxFinal = B.CreateLdexp(
398 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");
399 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
400
401 // Build:
402 // ax = fldexp(ax, ey);
403 // ret = copysign(ax,x);
404 AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");
405 if (ComputeFpTy != FremTy)
406 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
407 Value *Ret = B.CreateCopySign(AxFinal, X);
408
409 RetPhi->addIncoming(Ret, ExitBB);
410 }
411
412 /// Build the else-branch of the conditional in the FRem
413 /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
414 /// = |Y|, and X is the numerator and Y the denumerator. Add the
415 /// incoming edge from the result to \p RetPhi.
416 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
417 // Build:
418 // ret = ax == ay ? copysign(0.0f, x) : x;
419 Value *ZeroWithXSign = B.CreateCopySign(ConstantFP::getZero(FremTy), X);
420 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);
421
422 RetPhi->addIncoming(Ret, B.GetInsertBlock());
423 }
424
425 /// Return a value that is NaN if one of the corner cases concerning
426 /// the inputs \p X and \p Y is detected, and \p Ret otherwise.
427 Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
428 std::optional<SimplifyQuery> &SQ,
429 bool NoInfs) const {
430 // Build:
431 // ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
432 // ret = isfinite(x) ? ret : QNAN;
433 Value *Nan = ConstantFP::getQNaN(FremTy);
434 Ret = B.CreateSelect(B.CreateFCmpUEQ(Y, ConstantFP::getZero(FremTy)), Nan,
435 Ret);
436 Value *XFinite =
437 NoInfs || (SQ && isKnownNeverInfinity(X, *SQ))
438 ? B.getTrue()
439 : B.CreateFCmpULT(B.CreateFAbs(X), ConstantFP::getInfinity(FremTy));
440 Ret = B.CreateSelect(XFinite, Ret, Nan);
441
442 return Ret;
443 }
444};
445} // namespace
446
447Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
448 IRBuilder<>::FastMathFlagGuard Guard(B);
449 // Propagating the approximate functions flag to the
450 // division leads to an unacceptable drop in precision
451 // on AMDGPU.
452 // TODO Find out if any flags might be worth propagating.
453 B.clearFastMathFlags();
454
455 Value *Quot = B.CreateFDiv(X, Y);
456 Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
457 Value *Neg = B.CreateFNeg(Trunc);
458
459 return B.CreateFMA(Neg, Y, X);
460}
461
462Value *FRemExpander::buildFRem(Value *X, Value *Y,
463 std::optional<SimplifyQuery> &SQ) const {
464 assert(X->getType() == FremTy && Y->getType() == FremTy);
465
466 FastMathFlags FMF = B.getFastMathFlags();
467
468 // This function generates the following code structure:
469 // if (abs(x) > abs(y))
470 // { ret = compute remainder }
471 // else
472 // { ret = x or 0 with sign of x }
473 // Adjust ret to NaN/inf in input
474 // return ret
475 Value *Ax = B.CreateFAbs(X, {}, "ax");
476 Value *Ay = B.CreateFAbs(Y, {}, "ay");
477 if (ComputeFpTy != X->getType()) {
478 Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");
479 Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");
480 }
481 Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);
482
483 PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");
484 Value *Ret = RetPhi;
485
486 // We would return NaN in all corner cases handled here.
487 // Hence, if NaNs are excluded, keep the result as it is.
488 if (!FMF.noNaNs())
489 Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());
490
491 Function *Fun = B.GetInsertBlock()->getParent();
492 auto *ThenBB = BasicBlock::Create(B.getContext(), "frem.compute", Fun);
493 auto *ElseBB = BasicBlock::Create(B.getContext(), "frem.else", Fun);
494 SplitBlockAndInsertIfThenElse(AxAyCmp, RetPhi, &ThenBB, &ElseBB);
495
496 auto SavedInsertPt = B.GetInsertPoint();
497
498 // Build remainder computation for "then" branch
499 //
500 // The ordered comparison ensures that ax and ay are not NaNs
501 // in the then-branch. Furthermore, y cannot be an infinity and the
502 // check at the end of the function ensures that the result will not
503 // be used if x is an infinity.
504 FastMathFlags ComputeFMF = FMF;
505 ComputeFMF.setNoInfs();
506 ComputeFMF.setNoNaNs();
507
508 B.SetInsertPoint(ThenBB);
509 buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);
510 B.CreateBr(RetPhi->getParent());
511
512 // Build "else"-branch
513 B.SetInsertPoint(ElseBB);
514 buildElseBranch(Ax, Ay, X, RetPhi);
515 B.CreateBr(RetPhi->getParent());
516
517 B.SetInsertPoint(SavedInsertPt);
518
519 return Ret;
520}
521
522static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
523 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
524
525 Type *Ty = I.getType();
526 assert(FRemExpander::canExpandType(Ty) &&
527 "Expected supported floating point type");
528
529 FastMathFlags FMF = I.getFastMathFlags();
530 // TODO Make use of those flags for optimization?
531 FMF.setAllowReciprocal(false);
532 FMF.setAllowContract(false);
533
534 IRBuilder<> B(&I);
535 B.setFastMathFlags(FMF);
536 B.SetCurrentDebugLocation(I.getDebugLoc());
537
538 const FRemExpander Expander = FRemExpander::create(B, Ty);
539 Value *Ret = FMF.approxFunc()
540 ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
541 : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
542
543 I.replaceAllUsesWith(Ret);
544 Ret->takeName(&I);
545 I.eraseFromParent();
546
547 return true;
548}
549// clang-format off: preserve formatting of the following example
550
551/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
552/// the generated code. This currently generates code similarly to compiler-rt's
553/// implementations.
554///
555/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
556/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
557/// entry:
558/// %0 = bitcast float %a to i32
559/// %conv.i = zext i32 %0 to i64
560/// %tobool.not = icmp sgt i32 %0, -1
561/// %conv = select i1 %tobool.not, i64 1, i64 -1
562/// %and = lshr i64 %conv.i, 23
563/// %shr = and i64 %and, 255
564/// %and2 = and i64 %conv.i, 8388607
565/// %or = or i64 %and2, 8388608
566/// %cmp = icmp ult i64 %shr, 127
567/// br i1 %cmp, label %cleanup, label %if.end
568///
569/// if.end: ; preds = %entry
570/// %sub = add nuw nsw i64 %shr, 4294967169
571/// %conv5 = and i64 %sub, 4294967232
572/// %cmp6.not = icmp eq i64 %conv5, 0
573/// br i1 %cmp6.not, label %if.end12, label %if.then8
574///
575/// if.then8: ; preds = %if.end
576/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
577/// -9223372036854775808 br label %cleanup
578///
579/// if.end12: ; preds = %if.end
580/// %cmp13 = icmp ult i64 %shr, 150
581/// br i1 %cmp13, label %if.then15, label %if.else
582///
583/// if.then15: ; preds = %if.end12
584/// %sub16 = sub nuw nsw i64 150, %shr
585/// %shr17 = lshr i64 %or, %sub16
586/// %mul = mul nsw i64 %shr17, %conv
587/// br label %cleanup
588///
589/// if.else: ; preds = %if.end12
590/// %sub18 = add nsw i64 %shr, -150
591/// %shl = shl i64 %or, %sub18
592/// %mul19 = mul nsw i64 %shl, %conv
593/// br label %cleanup
594///
595/// cleanup: ; preds = %entry,
596/// %if.else, %if.then15, %if.then8
597/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
598/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
599/// }
600///
601/// Replace fp to integer with generated code.
602static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned) {
603 // clang-format on
604 IRBuilder<> Builder(FPToI);
605 auto *FloatVal = FPToI->getOperand(0);
606 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
607
608 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
609 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
610
611 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
612 // to i32 first following a sext/zext to target integer type.
613 Value *A1 = nullptr;
614 if (FloatVal->getType()->isHalfTy() && BitWidth >= 32) {
615 if (FPToI->getOpcode() == Instruction::FPToUI) {
616 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
617 A1 = Builder.CreateZExt(A0, IntTy);
618 } else { // FPToSI
619 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
620 A1 = Builder.CreateSExt(A0, IntTy);
621 }
622 FPToI->replaceAllUsesWith(A1);
623 FPToI->dropAllReferences();
624 FPToI->eraseFromParent();
625 return;
626 }
627
628 // fp80 conversion is implemented by fpext to fp128 first then do the
629 // conversion.
630 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
631 unsigned FloatWidth =
632 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
633 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
634 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
635 IntegerType *FloatIntTy = Builder.getIntNTy(FloatWidth);
636 Value *ImplicitBit = ConstantInt::get(
637 FloatIntTy, APInt::getOneBitSet(FloatWidth, FPMantissaWidth));
638 Value *SignificandMask = ConstantInt::get(
639 FloatIntTy, APInt::getLowBitsSet(FloatWidth, FPMantissaWidth));
640
641 BasicBlock *Entry = Builder.GetInsertBlock();
642 Function *F = Entry->getParent();
643 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
644 BasicBlock *CheckSaturateBB, *SaturateBB;
645 BasicBlock *End =
646 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
647 if (IsSaturating) {
648 CheckSaturateBB = BasicBlock::Create(Builder.getContext(),
649 "fp-to-i-if-check.saturate", F, End);
650 SaturateBB =
651 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-saturate", F, End);
652 }
653 BasicBlock *CheckExpSizeBB = BasicBlock::Create(
654 Builder.getContext(), "fp-to-i-if-check.exp.size", F, End);
655 BasicBlock *ExpSmallBB =
656 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-exp.small", F, End);
657 BasicBlock *ExpLargeBB =
658 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-exp.large", F, End);
659
660 Entry->getTerminator()->eraseFromParent();
661
662 // entry:
663 Builder.SetInsertPoint(Entry);
664 // We're going to introduce branches on the value, so freeze it.
666 FloatVal = Builder.CreateFreeze(FloatVal);
667 // fp80 conversion is implemented by fpext to fp128 first then do the
668 // conversion.
669 if (FloatVal->getType()->isX86_FP80Ty())
670 FloatVal =
671 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
672 Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);
673 Value *PosOrNeg, *Sign;
674 if (IsSigned) {
675 PosOrNeg =
676 Builder.CreateICmpSGT(ARep, ConstantInt::getSigned(FloatIntTy, -1));
677 Sign = Builder.CreateSelectWithUnknownProfile(
678 PosOrNeg, ConstantInt::getSigned(IntTy, 1),
679 ConstantInt::getSigned(IntTy, -1), "sign");
680 }
681 Value *And =
682 Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
683 Value *BiasedExp = Builder.CreateAnd(
684 And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1), "biased.exp");
685 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
686 Value *Significand = Builder.CreateOr(Abs, ImplicitBit, "significand");
687 Value *ZeroResultCond = Builder.CreateICmpULT(
688 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias), "exp.is.negative");
689 if (IsSaturating) {
690 Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal, "is.nan");
691 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);
692 if (!IsSigned) {
693 Value *IsNeg = Builder.CreateIsNeg(ARep);
694 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);
695 }
696 }
697 Instruction *CondBr = Builder.CreateCondBr(
698 ZeroResultCond, End, IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
699 // We do not have any information on the value of the exponent, so mark the
700 // branch weights as unkown.
702
703 Value *Saturated;
704 if (IsSaturating) {
705 // check.saturate:
706 Builder.SetInsertPoint(CheckSaturateBB);
707 uint64_t SaturatingBiasedExp =
708 static_cast<uint64_t>(ExponentBias) + BitWidth - IsSigned;
709 // Clamp to the all-ones (inf/NaN) exponent. Without this, when the integer
710 // is wide enough to hold every finite float the threshold exceeds any
711 // possible biased exponent, so +/-inf would never saturate.
712 uint64_t MaxBiasedExp = (1ULL << ExponentWidth) - 1;
713 if (SaturatingBiasedExp > MaxBiasedExp)
714 SaturatingBiasedExp = MaxBiasedExp;
715 Value *Cmp3 = Builder.CreateICmpUGE(
716 BiasedExp, ConstantInt::get(FloatIntTy, SaturatingBiasedExp));
717 Value *CondBrSat = Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
718 // Saturation is considered an unlikely event.
719 applyProfMetadataIfEnabled(CondBrSat, [&](Instruction *Inst) {
720 Inst->setMetadata(
721 LLVMContext::MD_prof,
723 });
724
725 // saturate:
726 Builder.SetInsertPoint(SaturateBB);
727 if (IsSigned) {
728 Value *SignedMax =
729 ConstantInt::get(IntTy, APInt::getSignedMaxValue(BitWidth));
730 Value *SignedMin =
731 ConstantInt::get(IntTy, APInt::getSignedMinValue(BitWidth));
732 // Select between the signed max and min values for saturation.
733 Saturated = Builder.CreateSelectWithUnknownProfile(
734 PosOrNeg, SignedMax, SignedMin, "saturated");
735 } else {
736 Saturated = ConstantInt::getAllOnesValue(IntTy);
737 }
738 Builder.CreateBr(End);
739 }
740
741 // if.end9:
742 Builder.SetInsertPoint(CheckExpSizeBB);
743 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
744 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
745 "exp.smaller.mantissa.width");
746 // We cannot determine whether this is a left shift or a right shift,
747 // so we mark the branch weights as unknown.
748 Value *CondBr2 =
749 Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
750 applyProfMetadataIfEnabled(CondBr2, [&](Instruction *Inst) {
752 });
753
754 // exp.small:
755 Builder.SetInsertPoint(ExpSmallBB);
756 Value *Sub13 = Builder.CreateSub(
757 Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
758 Value *ExpSmallRes =
759 Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
760 if (IsSigned)
761 ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);
762 Builder.CreateBr(End);
763
764 // exp.large:
765 Builder.SetInsertPoint(ExpLargeBB);
766 Value *Sub15 = Builder.CreateAdd(
767 BiasedExp,
769 FloatIntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
770 Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
771 Value *ExpLargeRes = Builder.CreateShl(
772 SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));
773 if (IsSigned)
774 ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);
775 Builder.CreateBr(End);
776
777 // cleanup:
778 Builder.SetInsertPoint(End, End->begin());
779 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 3 + IsSaturating);
780
781 if (IsSaturating)
782 Retval0->addIncoming(Saturated, SaturateBB);
783 Retval0->addIncoming(ExpSmallRes, ExpSmallBB);
784 Retval0->addIncoming(ExpLargeRes, ExpLargeBB);
785 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
786
787 FPToI->replaceAllUsesWith(Retval0);
788 FPToI->dropAllReferences();
789 FPToI->eraseFromParent();
790}
791
792// clang-format off: preserve formatting of the following example
793
794/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
795/// the generated code. This currently generates code similarly to compiler-rt's
796/// implementations. This implementation has an implicit assumption that integer
797/// width is larger than fp.
798///
799/// An example IR generated from compiler-rt/floatdisf.c looks like below:
800/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
801/// entry:
802/// %cmp = icmp eq i64 %a, 0
803/// br i1 %cmp, label %return, label %if.end
804///
805/// if.end: ; preds = %entry
806/// %shr = ashr i64 %a, 63
807/// %xor = xor i64 %shr, %a
808/// %sub = sub nsw i64 %xor, %shr
809/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
810/// %cast = trunc i64 %0 to i32
811/// %sub1 = sub nuw nsw i32 64, %cast
812/// %sub2 = xor i32 %cast, 63
813/// %cmp3 = icmp ult i32 %cast, 40
814/// br i1 %cmp3, label %if.then4, label %if.else
815///
816/// if.then4: ; preds = %if.end
817/// switch i32 %sub1, label %sw.default [
818/// i32 25, label %sw.bb
819/// i32 26, label %sw.epilog
820/// ]
821///
822/// sw.bb: ; preds = %if.then4
823/// %shl = shl i64 %sub, 1
824/// br label %sw.epilog
825///
826/// sw.default: ; preds = %if.then4
827/// %sub5 = sub nsw i64 38, %0
828/// %sh_prom = and i64 %sub5, 4294967295
829/// %shr6 = lshr i64 %sub, %sh_prom
830/// %shr9 = lshr i64 274877906943, %0
831/// %and = and i64 %shr9, %sub
832/// %cmp10 = icmp ne i64 %and, 0
833/// %conv11 = zext i1 %cmp10 to i64
834/// %or = or i64 %shr6, %conv11
835/// br label %sw.epilog
836///
837/// sw.epilog: ; preds = %sw.default,
838/// %if.then4, %sw.bb
839/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
840/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
841/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
842/// %tobool.not = icmp eq i64 %3, 0
843/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
844/// %spec.select = ashr i64 %inc, %spec.select.v
845/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
846/// br label %if.end26
847///
848/// if.else: ; preds = %if.end
849/// %sub23 = add nuw nsw i64 %0, 4294967256
850/// %sh_prom24 = and i64 %sub23, 4294967295
851/// %shl25 = shl i64 %sub, %sh_prom24
852/// br label %if.end26
853///
854/// if.end26: ; preds = %sw.epilog,
855/// %if.else
856/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
857/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
858/// %conv27 = trunc i64 %shr to i32
859/// %and28 = and i32 %conv27, -2147483648
860/// %add = shl nuw nsw i32 %e.0, 23
861/// %shl29 = add nuw nsw i32 %add, 1065353216
862/// %conv31 = trunc i64 %a.addr.1 to i32
863/// %and32 = and i32 %conv31, 8388607
864/// %or30 = or i32 %and32, %and28
865/// %or33 = or i32 %or30, %shl29
866/// %4 = bitcast i32 %or33 to float
867/// br label %return
868///
869/// return: ; preds = %entry,
870/// %if.end26
871/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
872/// ret float %retval.0
873/// }
874///
875/// Replace integer to fp with generated code.
876static void expandIToFP(Instruction *IToFP) {
877 // clang-format on
878 IRBuilder<> Builder(IToFP);
879 auto *IntVal = IToFP->getOperand(0);
880 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
881
882 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
883 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
884 // fp80 conversion is implemented by conversion tp fp128 first following
885 // a fptrunc to fp80.
886 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
887 // FIXME: As there is no related builtins added in compliler-rt,
888 // here currently utilized the fp32 <-> fp16 lib calls to implement.
889 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
890 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
891 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
892 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
893
894 // We're going to introduce branches on the value, so freeze it.
896 IntVal = Builder.CreateFreeze(IntVal);
897
898 // The expansion below assumes that int width >= float width. Zero or sign
899 // extend the integer accordingly.
900 if (BitWidth < FloatWidth) {
901 BitWidth = FloatWidth;
902 IntTy = Builder.getIntNTy(BitWidth);
903 IntVal = Builder.CreateIntCast(IntVal, IntTy, IsSigned);
904 }
905
906 Value *Temp1 =
907 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
908 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
909
910 BasicBlock *Entry = Builder.GetInsertBlock();
911 Function *F = Entry->getParent();
912 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
913 BasicBlock *End =
914 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
915 BasicBlock *IfEnd =
916 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
917 BasicBlock *IfThen4 =
918 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
919 BasicBlock *SwBB =
920 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
921 BasicBlock *SwDefault =
922 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
923 BasicBlock *SwEpilog =
924 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
925 BasicBlock *IfThen20 =
926 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
927 BasicBlock *IfElse =
928 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
929 BasicBlock *IfEnd26 =
930 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
931
932 Entry->getTerminator()->eraseFromParent();
933
934 Function *CTLZ =
935 Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
936 ConstantInt *True = Builder.getTrue();
937
938 // entry:
939 Builder.SetInsertPoint(Entry);
940 // We assume that the zero is an unlikely input case, so the branch to 'End'
941 // is the unlikely path.
942 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
943 Value *CondBrEntry = Builder.CreateCondBr(Cmp, End, IfEnd);
944 applyProfMetadataIfEnabled(CondBrEntry, [&](Instruction *Inst) {
946 Inst->setMetadata(
947 LLVMContext::MD_prof,
949 });
950
951 // if.end:
952 Builder.SetInsertPoint(IfEnd);
953 Value *Shr =
954 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
955 Value *Xor = Builder.CreateXor(Shr, IntVal);
956 Value *Sub = Builder.CreateSub(Xor, Shr);
957 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
958 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
959 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
960 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
961 FloatWidth == 128 ? Call : Cast);
962 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
963 FloatWidth == 128 ? Call : Cast);
964 Value *Cmp3 = Builder.CreateICmpSGT(
965 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
966 // This branch handles the rare case where rounding the mantissa causes a
967 // carry-out at the most significant bit, necessitating an increment of the
968 // exponent. This is rare case, so the True path is mared as likely.
969 Value *CondBrIfEnd = Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
970 applyProfMetadataIfEnabled(CondBrIfEnd, [&](Instruction *Inst) {
972 Inst->setMetadata(
973 LLVMContext::MD_prof,
975 });
976
977 // if.then4:
978 Builder.SetInsertPoint(IfThen4);
979 SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
980 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
981 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
982 // Add branch weights to the SwitchInst. The weights are provided for the
983 // default case first (SwDefault), followed by each explicit case in the
984 // order they were added (SwBB, then SwEpilog). Because the following cases
985 // are rare, the defalut case is given a likely weight.
988 SI->setMetadata(
989 LLVMContext::MD_prof,
990 MDBuilder(SI->getContext())
991 .createBranchWeights({llvm::MDBuilder::kLikelyBranchWeight,
992 llvm::MDBuilder::kUnlikelyBranchWeight,
993 llvm::MDBuilder::kUnlikelyBranchWeight}));
994 }
995
996 // sw.bb:
997 Builder.SetInsertPoint(SwBB);
998 Value *Shl =
999 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
1000 Builder.CreateBr(SwEpilog);
1001
1002 // sw.default:
1003 Builder.SetInsertPoint(SwDefault);
1004 Value *Sub5 = Builder.CreateSub(
1005 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
1006 FloatWidth == 128 ? Call : Cast);
1007 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
1008 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
1009 FloatWidth == 128 ? Sub5 : ShProm);
1010 Value *Sub8 =
1011 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
1012 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
1013 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
1014 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
1015 FloatWidth == 128 ? Sub8 : ShProm9);
1016 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
1017 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
1018 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
1019 Value *Or = Builder.CreateOr(Shr6, Conv11);
1020 Builder.CreateBr(SwEpilog);
1021
1022 // sw.epilog:
1023 Builder.SetInsertPoint(SwEpilog);
1024 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
1025 AAddr0->addIncoming(Or, SwDefault);
1026 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
1027 AAddr0->addIncoming(Shl, SwBB);
1028 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
1029 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
1030 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
1031 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
1032 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
1033 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
1034 Value *Shr18 = nullptr;
1035 if (IsSigned)
1036 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
1037 else
1038 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
1039 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
1040 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
1041 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
1042 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
1043 Value *ExtractT64 = nullptr;
1044 if (FloatWidth > 80)
1045 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1046 else
1047 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
1048 // Rounding usually keeps the exponent within its current magnitude and
1049 // overflow is rare. The False path is unlikely to be taken.
1050 Value *CondBrSwEpilog = Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
1051 applyProfMetadataIfEnabled(CondBrSwEpilog, [&](Instruction *Inst) {
1053 Inst->setMetadata(
1054 LLVMContext::MD_prof,
1056 });
1057
1058 // if.then20
1059 Builder.SetInsertPoint(IfThen20);
1060 Value *Shr21 = nullptr;
1061 if (IsSigned)
1062 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
1063 else
1064 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
1065 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
1066 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
1067 Value *ExtractT62 = nullptr;
1068 if (FloatWidth > 80)
1069 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
1070 else
1071 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
1072 Builder.CreateBr(IfEnd26);
1073
1074 // if.else:
1075 Builder.SetInsertPoint(IfElse);
1076 Value *Sub24 = Builder.CreateAdd(
1077 FloatWidth == 128 ? Call : Cast,
1078 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
1079 -(int)(BitWidth - FPMantissaWidth - 1)));
1080 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
1081 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
1082 FloatWidth == 128 ? Sub24 : ShProm25);
1083 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
1084 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
1085 Value *ExtractT66 = nullptr;
1086 if (FloatWidth > 80)
1087 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1088 else
1089 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
1090 Builder.CreateBr(IfEnd26);
1091
1092 // if.end26:
1093 Builder.SetInsertPoint(IfEnd26);
1094 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
1095 AAddr1Off0->addIncoming(ExtractT, IfThen20);
1096 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
1097 AAddr1Off0->addIncoming(ExtractT61, IfElse);
1098 PHINode *AAddr1Off32 = nullptr;
1099 if (FloatWidth > 32) {
1100 AAddr1Off32 =
1101 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
1102 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
1103 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
1104 AAddr1Off32->addIncoming(ExtractT66, IfElse);
1105 }
1106 PHINode *E0 = nullptr;
1107 if (FloatWidth <= 80) {
1108 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
1109 E0->addIncoming(Sub1, IfThen20);
1110 E0->addIncoming(Sub2, SwEpilog);
1111 E0->addIncoming(Sub2, IfElse);
1112 }
1113 Value *And29 = nullptr;
1114 if (FloatWidth > 80) {
1115 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
1116 Builder.getIntN(BitWidth, 63));
1117 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
1118 } else {
1119 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
1120 And29 = Builder.CreateAnd(
1121 Conv28, ConstantInt::get(Builder.getContext(), APInt::getSignMask(32)));
1122 }
1123 unsigned TempMod = FPMantissaWidth % 32;
1124 Value *And34 = nullptr;
1125 Value *Shl30 = nullptr;
1126 if (FloatWidth > 80) {
1127 TempMod += 32;
1128 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
1129 Shl30 = Builder.CreateAdd(
1130 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
1131 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
1132 } else {
1133 Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
1134 Shl30 = Builder.CreateAdd(
1135 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
1136 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
1137 Builder.getInt32((1 << TempMod) - 1));
1138 }
1139 Value *Or35 = nullptr;
1140 if (FloatWidth > 80) {
1141 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
1142 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
1143 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
1144 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
1145 Builder.getIntN(128, FPMantissaWidth));
1146 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
1147 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
1148 Or35 = Builder.CreateOr(Or34, A6);
1149 } else {
1150 Value *Or31 = Builder.CreateOr(And34, And29);
1151 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
1152 }
1153 Value *A4 = nullptr;
1154 if (IToFP->getType()->isDoubleTy()) {
1155 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
1156 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
1157 Value *And1 =
1158 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
1159 Value *Or1 = Builder.CreateOr(Shl1, And1);
1160 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
1161 } else if (IToFP->getType()->isX86_FP80Ty()) {
1162 Value *A40 =
1163 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
1164 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
1165 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
1166 // Deal with "half" situation. This is a workaround since we don't have
1167 // floattihf.c currently as referring.
1168 Value *A40 =
1169 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
1170 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
1171 } else // float type
1172 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
1173
1174 // Sub2 is the unbiased exponent (the index of the top set bit in the input).
1175 // The exponent arithmetic above wraps to garbage instead of inf once it
1176 // overflows the exponent field, so saturate to a correctly-signed infinity
1177 // when Sub2 reaches 1 << (ExponentWidth - 1). Sub2 is at most BitWidth - 1,
1178 // so skip the check entirely when even that can't reach the threshold.
1179 // (Values that round *up* into inf, e.g. 2^n - 1, keep Sub2 = BitWidth - 1;
1180 // these are handled by the conversion's own rounding, not by this
1181 // saturation.)
1182 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
1183 uint64_t MinInfExp = 1ULL << (ExponentWidth - 1);
1184 if (BitWidth - 1 >= MinInfExp) {
1185 Value *MinInfExpVal = Builder.getIntN(BitWidthNew, MinInfExp);
1186 Value *Overflow = Builder.CreateICmpUGE(Sub2, MinInfExpVal);
1187 Value *Inf = ConstantFP::getInfinity(IToFP->getType(), /*Negative=*/false);
1188 if (IsSigned) {
1189 Value *NegInf =
1190 ConstantFP::getInfinity(IToFP->getType(), /*Negative=*/true);
1191 Value *IsNeg =
1192 Builder.CreateICmpSLT(IntVal, ConstantInt::getNullValue(IntTy));
1193 Inf = Builder.CreateSelect(IsNeg, NegInf, Inf);
1194 }
1195 A4 = Builder.CreateSelect(Overflow, Inf, A4);
1196 }
1197 Builder.CreateBr(End);
1198
1199 // return:
1200 Builder.SetInsertPoint(End, End->begin());
1201 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
1202 Retval0->addIncoming(A4, IfEnd26);
1203 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
1204
1205 IToFP->replaceAllUsesWith(Retval0);
1206 IToFP->dropAllReferences();
1207 IToFP->eraseFromParent();
1208}
1209
1212 VectorType *VTy = cast<FixedVectorType>(I->getType());
1213
1214 IRBuilder<> Builder(I);
1215
1216 unsigned NumElements = VTy->getElementCount().getFixedValue();
1217 Value *Result = PoisonValue::get(VTy);
1218 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
1219 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
1220
1221 Value *NewOp = nullptr;
1222 if (auto *BinOp = dyn_cast<BinaryOperator>(I))
1223 NewOp = Builder.CreateBinOp(
1224 BinOp->getOpcode(), Ext,
1225 Builder.CreateExtractElement(I->getOperand(1), Idx));
1226 else if (auto *CastI = dyn_cast<CastInst>(I))
1227 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
1228 I->getType()->getScalarType());
1229 else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1230 assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1231 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1232 NewOp = Builder.CreateIntrinsic(I->getType()->getScalarType(),
1233 II->getIntrinsicID(), {Ext});
1234 } else
1235 llvm_unreachable("Unsupported instruction type");
1236
1237 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
1238 if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) {
1239 ScalarizedI->copyIRFlags(I, true);
1240 Worklist.push_back(ScalarizedI);
1241 }
1242 }
1243
1244 I->replaceAllUsesWith(Result);
1245 I->dropAllReferences();
1246 I->eraseFromParent();
1247}
1248
1251 if (I.getOperand(0)->getType()->isVectorTy())
1252 scalarize(&I, Worklist);
1253 else
1254 Worklist.push_back(&I);
1255}
1256
1257static bool runImpl(Function &F, const TargetLowering &TLI,
1258 const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {
1260
1261 unsigned MaxLegalFpConvertBitWidth =
1264 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
1265
1266 unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
1268 MaxLegalDivRemBitWidth = ExpandDivRemBits;
1269
1270 bool DisableExpandLargeFp =
1271 MaxLegalFpConvertBitWidth >= IntegerType::MAX_INT_BITS;
1272 bool DisableExpandLargeDivRem =
1273 MaxLegalDivRemBitWidth >= IntegerType::MAX_INT_BITS;
1274 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1275
1276 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1277 return false;
1278
1279 auto ShouldHandleInst = [&](Instruction &I) {
1280 Type *Ty = I.getType();
1281 // TODO: This pass doesn't handle scalable vectors.
1282 if (Ty->isScalableTy())
1283 return false;
1284
1285 switch (I.getOpcode()) {
1286 case Instruction::FRem:
1287 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1288 case Instruction::FPToUI:
1289 case Instruction::FPToSI:
1290 return !DisableExpandLargeFp &&
1291 cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
1292 MaxLegalFpConvertBitWidth;
1293 case Instruction::UIToFP:
1294 case Instruction::SIToFP:
1295 return !DisableExpandLargeFp &&
1296 cast<IntegerType>(I.getOperand(0)->getType()->getScalarType())
1297 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1298 case Instruction::UDiv:
1299 case Instruction::SDiv:
1300 case Instruction::URem:
1301 case Instruction::SRem:
1302 // Power-of-2 divisors are handled inside the expansion (via efficient
1303 // shift/mask sequences) rather than being excluded here, so that
1304 // backends that cannot lower wide div/rem even for powers of two
1305 // (e.g. when DAGCombiner is disabled) still get valid lowered code.
1306 return !DisableExpandLargeDivRem &&
1307 cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
1308 MaxLegalDivRemBitWidth;
1309 case Instruction::Call: {
1310 auto *II = dyn_cast<IntrinsicInst>(&I);
1311 if (II && (II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1312 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1313 return !DisableExpandLargeFp &&
1314 cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
1315 MaxLegalFpConvertBitWidth;
1316 }
1317 return false;
1318 }
1319 }
1320
1321 return false;
1322 };
1323
1324 bool Modified = false;
1325 for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {
1326 Instruction &I = *It++;
1327 if (!ShouldHandleInst(I))
1328 continue;
1329
1330 addToWorklist(I, Worklist);
1331 Modified = true;
1332 }
1333
1334 while (!Worklist.empty()) {
1335 Instruction *I = Worklist.pop_back_val();
1336
1337 switch (I->getOpcode()) {
1338 case Instruction::FRem: {
1339 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1340 if (AC) {
1341 auto Res = std::make_optional<SimplifyQuery>(
1342 I->getModule()->getDataLayout(), I);
1343 Res->AC = AC;
1344 return Res;
1345 }
1346 return {};
1347 }();
1348
1350 break;
1351 }
1352
1353 case Instruction::FPToUI:
1354 expandFPToI(I, /*IsSaturating=*/false, /*IsSigned=*/false);
1355 break;
1356 case Instruction::FPToSI:
1357 expandFPToI(I, /*IsSaturating=*/false, /*IsSigned=*/true);
1358 break;
1359
1360 case Instruction::UIToFP:
1361 case Instruction::SIToFP:
1362 expandIToFP(I);
1363 break;
1364
1365 case Instruction::UDiv:
1366 case Instruction::SDiv:
1367 case Instruction::URem:
1368 case Instruction::SRem: {
1369 auto *BO = cast<BinaryOperator>(I);
1370 // TODO: isConstantPowerOfTwo does not handle vector constants, so
1371 // vector div/rem by a power-of-2 splat goes through the generic path.
1372 if (isConstantPowerOfTwo(BO->getOperand(1), isSigned(BO->getOpcode()))) {
1373 expandPow2DivRem(BO);
1374 } else {
1375 unsigned Opc = BO->getOpcode();
1376 if (Opc == Instruction::UDiv || Opc == Instruction::SDiv)
1377 expandDivision(BO);
1378 else
1379 expandRemainder(BO);
1380 }
1381 break;
1382 }
1383 case Instruction::Call: {
1384 auto *II = cast<IntrinsicInst>(I);
1385 assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1386 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1387 expandFPToI(I, /*IsSaturating=*/true,
1388 /*IsSigned=*/II->getIntrinsicID() == Intrinsic::fptosi_sat);
1389 break;
1390 }
1391 }
1392 }
1393
1394 return Modified;
1395}
1396
1397namespace {
1398class ExpandIRInstsLegacyPass : public FunctionPass {
1399 CodeGenOptLevel OptLevel;
1400
1401public:
1402 static char ID;
1403
1404 ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)
1405 : FunctionPass(ID), OptLevel(OptLevel) {}
1406
1407 ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass(CodeGenOptLevel::None) {}
1408
1409 bool runOnFunction(Function &F) override {
1410 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1411 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
1412 auto *TLI = Subtarget->getTargetLowering();
1413 AssumptionCache *AC = nullptr;
1414
1415 const LibcallLoweringInfo &Libcalls =
1416 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1417 *F.getParent(), *Subtarget);
1418
1419 if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1420 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1421 return runImpl(F, *TLI, Libcalls, AC);
1422 }
1423
1424 void getAnalysisUsage(AnalysisUsage &AU) const override {
1425 AU.addRequired<LibcallLoweringInfoWrapper>();
1426 AU.addRequired<TargetPassConfig>();
1427 if (OptLevel != CodeGenOptLevel::None)
1428 AU.addRequired<AssumptionCacheTracker>();
1429 AU.addPreserved<AAResultsWrapperPass>();
1430 AU.addPreserved<GlobalsAAWrapperPass>();
1431 AU.addRequired<LibcallLoweringInfoWrapper>();
1432 }
1433};
1434} // namespace
1435
1437 CodeGenOptLevel OptLevel)
1438 : TM(&TM), OptLevel(OptLevel) {}
1439
1441 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1442 static_cast<PassInfoMixin<ExpandIRInstsPass> *>(this)->printPipeline(
1443 OS, MapClassName2PassName);
1444 OS << '<';
1445 OS << "O" << (int)OptLevel;
1446 OS << '>';
1447}
1448
1451 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1452 auto &TLI = *STI->getTargetLowering();
1453 AssumptionCache *AC = nullptr;
1454 if (OptLevel != CodeGenOptLevel::None)
1455 AC = &FAM.getResult<AssumptionAnalysis>(F);
1456
1457 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1458
1459 const LibcallLoweringModuleAnalysisResult *LibcallLowering =
1460 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
1461
1462 if (!LibcallLowering) {
1463 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
1464 "' analysis required");
1465 return PreservedAnalyses::all();
1466 }
1467
1468 const LibcallLoweringInfo &Libcalls =
1469 LibcallLowering->getLibcallLowering(*STI);
1470
1471 return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()
1473}
1474
1475char ExpandIRInstsLegacyPass::ID = 0;
1476INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",
1477 "Expand certain fp instructions", false, false)
1479INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",
1480 "Expand IR instructions", false, false)
1481
1483 return new ExpandIRInstsLegacyPass(OptLevel);
1484}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static cl::opt< unsigned > ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("div and rem instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static void expandPow2DivRem(BinaryOperator *BO)
Expand division or remainder by a power-of-2 constant.
static bool isSigned(unsigned Opcode)
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
static Value * addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth, unsigned ShiftAmt)
For signed div/rem by a power of 2, compute the bias-adjusted dividend: Sign = ashr X,...
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static bool isConstantPowerOfTwo(Value *V, bool SignedOp)
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
Function * Fun
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
BinaryOps getOpcode() const
Definition InstrTypes.h:409
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setAllowContract(bool B=true)
Definition FMF.h:93
bool noInfs() const
Definition FMF.h:69
void setAllowReciprocal(bool B=true)
Definition FMF.h:90
bool approxFunc() const
Definition FMF.h:73
void setNoNaNs(bool B=true)
Definition FMF.h:81
bool noNaNs() const
Definition FMF.h:68
void setNoInfs(bool B=true)
Definition FMF.h:84
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
Definition MDBuilder.cpp:43
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Multiway switch.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:161
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:144
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:241
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
void dropAllReferences()
Drop all references to operands.
Definition User.h:324
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool expandDivision(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI void applyProfMetadataIfEnabled(Value *V, llvm::function_ref< void(Instruction *)> setMetadataCallback)
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI FunctionPass * createExpandIRInstsPass(CodeGenOptLevel)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI bool expandRemainder(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
Matching combinators.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89