LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 bool Lit = false;
84 bool Lit64 = false;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89
90 int64_t getFPModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Abs ? SISrcMods::ABS : 0u;
93 Operand |= Neg ? SISrcMods::NEG : 0u;
94 return Operand;
95 }
96
97 int64_t getIntModifiersOperand() const {
98 int64_t Operand = 0;
99 Operand |= Sext ? SISrcMods::SEXT : 0u;
100 return Operand;
101 }
102
103 int64_t getModifiersOperand() const {
104 assert(!(hasFPModifiers() && hasIntModifiers())
105 && "fp and int modifiers should not be used simultaneously");
106 if (hasFPModifiers())
107 return getFPModifiersOperand();
108 if (hasIntModifiers())
109 return getIntModifiersOperand();
110 return 0;
111 }
112
113 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
114 };
115
116 enum ImmTy {
117 ImmTyNone,
118 ImmTyGDS,
119 ImmTyLDS,
120 ImmTyOffen,
121 ImmTyIdxen,
122 ImmTyAddr64,
123 ImmTyOffset,
124 ImmTyInstOffset,
125 ImmTyOffset0,
126 ImmTyOffset1,
127 ImmTySMEMOffsetMod,
128 ImmTyCPol,
129 ImmTyTFE,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyFORMAT,
148 ImmTyHwreg,
149 ImmTyOff,
150 ImmTySendMsg,
151 ImmTyInterpSlot,
152 ImmTyInterpAttr,
153 ImmTyInterpAttrChan,
154 ImmTyOpSel,
155 ImmTyOpSelHi,
156 ImmTyNegLo,
157 ImmTyNegHi,
158 ImmTyIndexKey8bit,
159 ImmTyIndexKey16bit,
160 ImmTyIndexKey32bit,
161 ImmTyDPP8,
162 ImmTyDppCtrl,
163 ImmTyDppRowMask,
164 ImmTyDppBankMask,
165 ImmTyDppBoundCtrl,
166 ImmTyDppFI,
167 ImmTySwizzle,
168 ImmTyGprIdxMode,
169 ImmTyHigh,
170 ImmTyBLGP,
171 ImmTyCBSZ,
172 ImmTyABID,
173 ImmTyEndpgm,
174 ImmTyWaitVDST,
175 ImmTyWaitEXP,
176 ImmTyWaitVAVDst,
177 ImmTyWaitVMVSrc,
178 ImmTyBitOp3,
179 ImmTyMatrixAFMT,
180 ImmTyMatrixBFMT,
181 ImmTyMatrixAScale,
182 ImmTyMatrixBScale,
183 ImmTyMatrixAScaleFmt,
184 ImmTyMatrixBScaleFmt,
185 ImmTyMatrixAReuse,
186 ImmTyMatrixBReuse,
187 ImmTyScaleSel,
188 ImmTyByteSel,
189 };
190
191private:
192 struct TokOp {
193 const char *Data;
194 unsigned Length;
195 };
196
197 struct ImmOp {
198 int64_t Val;
199 ImmTy Type;
200 bool IsFPImm;
201 Modifiers Mods;
202 };
203
204 struct RegOp {
205 MCRegister RegNo;
206 Modifiers Mods;
207 };
208
209 union {
210 TokOp Tok;
211 ImmOp Imm;
212 RegOp Reg;
213 const MCExpr *Expr;
214 };
215
216 // The index of the associated MCInst operand.
217 mutable int MCOpIdx = -1;
218
219public:
220 bool isToken() const override { return Kind == Token; }
221
222 bool isSymbolRefExpr() const {
223 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
224 }
225
226 bool isImm() const override {
227 return Kind == Immediate;
228 }
229
230 bool isInlinableImm(MVT type) const;
231 bool isLiteralImm(MVT type) const;
232
233 bool isRegKind() const {
234 return Kind == Register;
235 }
236
237 bool isReg() const override {
238 return isRegKind() && !hasModifiers();
239 }
240
241 bool isRegOrInline(unsigned RCID, MVT type) const {
242 return isRegClass(RCID) || isInlinableImm(type);
243 }
244
245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246 return isRegOrInline(RCID, type) || isLiteralImm(type);
247 }
248
249 bool isRegOrImmWithInt16InputMods() const {
250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251 }
252
253 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
255 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
256 }
257
258 bool isRegOrImmWithInt32InputMods() const {
259 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
260 }
261
262 bool isRegOrInlineImmWithInt16InputMods() const {
263 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
264 }
265
266 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
267 return isRegOrInline(
268 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
269 }
270
271 bool isRegOrInlineImmWithInt32InputMods() const {
272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
273 }
274
275 bool isRegOrImmWithInt64InputMods() const {
276 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
277 }
278
279 bool isRegOrImmWithFP16InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
281 }
282
283 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
285 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
286 }
287
288 bool isRegOrImmWithFP32InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
290 }
291
292 bool isRegOrImmWithFP64InputMods() const {
293 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
294 }
295
296 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
297 return isRegOrInline(
298 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
299 }
300
301 bool isRegOrInlineImmWithFP32InputMods() const {
302 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
303 }
304
305 bool isRegOrInlineImmWithFP64InputMods() const {
306 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
307 }
308
309 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
310
311 bool isVRegWithFP32InputMods() const {
312 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
313 }
314
315 bool isVRegWithFP64InputMods() const {
316 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
317 }
318
319 bool isPackedFP16InputMods() const {
320 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
321 }
322
323 bool isPackedVGPRFP32InputMods() const {
324 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
325 }
326
327 bool isVReg() const {
328 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
329 isRegClass(AMDGPU::VReg_64RegClassID) ||
330 isRegClass(AMDGPU::VReg_96RegClassID) ||
331 isRegClass(AMDGPU::VReg_128RegClassID) ||
332 isRegClass(AMDGPU::VReg_160RegClassID) ||
333 isRegClass(AMDGPU::VReg_192RegClassID) ||
334 isRegClass(AMDGPU::VReg_256RegClassID) ||
335 isRegClass(AMDGPU::VReg_512RegClassID) ||
336 isRegClass(AMDGPU::VReg_1024RegClassID);
337 }
338
339 bool isVReg32() const {
340 return isRegClass(AMDGPU::VGPR_32RegClassID);
341 }
342
343 bool isVReg32OrOff() const {
344 return isOff() || isVReg32();
345 }
346
347 bool isNull() const {
348 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
349 }
350
351 bool isVRegWithInputMods() const;
352 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
353 template <bool IsFake16> bool isT16VRegWithInputMods() const;
354
355 bool isSDWAOperand(MVT type) const;
356 bool isSDWAFP16Operand() const;
357 bool isSDWAFP32Operand() const;
358 bool isSDWAInt16Operand() const;
359 bool isSDWAInt32Operand() const;
360
361 bool isImmTy(ImmTy ImmT) const {
362 return isImm() && Imm.Type == ImmT;
363 }
364
365 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
366
367 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
368
369 bool isImmModifier() const {
370 return isImm() && Imm.Type != ImmTyNone;
371 }
372
373 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
374 bool isDim() const { return isImmTy(ImmTyDim); }
375 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
376 bool isOff() const { return isImmTy(ImmTyOff); }
377 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
378 bool isOffen() const { return isImmTy(ImmTyOffen); }
379 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
380 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
381 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
382 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
383 bool isGDS() const { return isImmTy(ImmTyGDS); }
384 bool isLDS() const { return isImmTy(ImmTyLDS); }
385 bool isCPol() const { return isImmTy(ImmTyCPol); }
386 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
387 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
388 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
389 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
390 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
391 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
392 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
393 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
394 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
395 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
396 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
397 bool isTFE() const { return isImmTy(ImmTyTFE); }
398 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
399 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
400 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
401 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
402 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
403 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
404 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
405 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
406 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
407 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
408 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
409 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
410 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
411 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
412
413 bool isRegOrImm() const {
414 return isReg() || isImm();
415 }
416
417 bool isRegClass(unsigned RCID) const;
418
419 bool isInlineValue() const;
420
421 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
422 return isRegOrInline(RCID, type) && !hasModifiers();
423 }
424
425 bool isSCSrcB16() const {
426 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
427 }
428
429 bool isSCSrcV2B16() const {
430 return isSCSrcB16();
431 }
432
433 bool isSCSrc_b32() const {
434 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
435 }
436
437 bool isSCSrc_b64() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
439 }
440
441 bool isBoolReg() const;
442
443 bool isSCSrcF16() const {
444 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
445 }
446
447 bool isSCSrcV2F16() const {
448 return isSCSrcF16();
449 }
450
451 bool isSCSrcF32() const {
452 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
453 }
454
455 bool isSCSrcF64() const {
456 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
457 }
458
459 bool isSSrc_b32() const {
460 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
461 }
462
463 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
464
465 bool isSSrcV2B16() const {
466 llvm_unreachable("cannot happen");
467 return isSSrc_b16();
468 }
469
470 bool isSSrc_b64() const {
471 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
472 // See isVSrc64().
473 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
474 (((const MCTargetAsmParser *)AsmParser)
475 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
476 isExpr());
477 }
478
479 bool isSSrc_f32() const {
480 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
481 }
482
483 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
484
485 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
486
487 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
488
489 bool isSSrcV2F16() const {
490 llvm_unreachable("cannot happen");
491 return isSSrc_f16();
492 }
493
494 bool isSSrcV2FP32() const {
495 llvm_unreachable("cannot happen");
496 return isSSrc_f32();
497 }
498
499 bool isSCSrcV2FP32() const {
500 llvm_unreachable("cannot happen");
501 return isSCSrcF32();
502 }
503
504 bool isSSrcV2INT32() const {
505 llvm_unreachable("cannot happen");
506 return isSSrc_b32();
507 }
508
509 bool isSCSrcV2INT32() const {
510 llvm_unreachable("cannot happen");
511 return isSCSrc_b32();
512 }
513
514 bool isSSrcOrLds_b32() const {
515 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
516 isLiteralImm(MVT::i32) || isExpr();
517 }
518
519 bool isVCSrc_b32() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
521 }
522
523 bool isVCSrc_b32_Lo256() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
525 }
526
527 bool isVCSrc_b64_Lo256() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
529 }
530
531 bool isVCSrc_b64() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
533 }
534
535 bool isVCSrcT_b16() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
537 }
538
539 bool isVCSrcTB16_Lo128() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
541 }
542
543 bool isVCSrcFake16B16_Lo128() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
545 }
546
547 bool isVCSrc_b16() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
549 }
550
551 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
552
553 bool isVCSrc_f32() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
555 }
556
557 bool isVCSrc_f64() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
559 }
560
561 bool isVCSrcTBF16() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
563 }
564
565 bool isVCSrcT_f16() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
567 }
568
569 bool isVCSrcT_bf16() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
571 }
572
573 bool isVCSrcTBF16_Lo128() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
575 }
576
577 bool isVCSrcTF16_Lo128() const {
578 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
579 }
580
581 bool isVCSrcFake16BF16_Lo128() const {
582 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
583 }
584
585 bool isVCSrcFake16F16_Lo128() const {
586 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
587 }
588
589 bool isVCSrc_bf16() const {
590 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
591 }
592
593 bool isVCSrc_f16() const {
594 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
595 }
596
597 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
598
599 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
600
601 bool isVSrc_b32() const {
602 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
603 }
604
605 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
606
607 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
608
609 bool isVSrcT_b16_Lo128() const {
610 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
611 }
612
613 bool isVSrcFake16_b16_Lo128() const {
614 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
615 }
616
617 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
618
619 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
620
621 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
622
623 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
624
625 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
626
627 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
628
629 bool isVSrc_f32() const {
630 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
631 }
632
633 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
634
635 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
636
637 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
638
639 bool isVSrcT_bf16_Lo128() const {
640 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
641 }
642
643 bool isVSrcT_f16_Lo128() const {
644 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
645 }
646
647 bool isVSrcFake16_bf16_Lo128() const {
648 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
649 }
650
651 bool isVSrcFake16_f16_Lo128() const {
652 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
653 }
654
655 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
656
657 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
658
659 bool isVSrc_v2bf16() const {
660 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
661 }
662
663 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
664
665 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
666
667 bool isVISrcB32() const {
668 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
669 }
670
671 bool isVISrcB16() const {
672 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
673 }
674
675 bool isVISrcV2B16() const {
676 return isVISrcB16();
677 }
678
679 bool isVISrcF32() const {
680 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
681 }
682
683 bool isVISrcF16() const {
684 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
685 }
686
687 bool isVISrcV2F16() const {
688 return isVISrcF16() || isVISrcB32();
689 }
690
691 bool isVISrc_64_bf16() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
693 }
694
695 bool isVISrc_64_f16() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
697 }
698
699 bool isVISrc_64_b32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
701 }
702
703 bool isVISrc_64B64() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
705 }
706
707 bool isVISrc_64_f64() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
709 }
710
711 bool isVISrc_64V2FP32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
713 }
714
715 bool isVISrc_64V2INT32() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
717 }
718
719 bool isVISrc_256_b32() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
721 }
722
723 bool isVISrc_256_f32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
725 }
726
727 bool isVISrc_256B64() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
729 }
730
731 bool isVISrc_256_f64() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
733 }
734
735 bool isVISrc_512_f64() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
737 }
738
739 bool isVISrc_128B16() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
741 }
742
743 bool isVISrc_128V2B16() const {
744 return isVISrc_128B16();
745 }
746
747 bool isVISrc_128_b32() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
749 }
750
751 bool isVISrc_128_f32() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
753 }
754
755 bool isVISrc_256V2FP32() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
757 }
758
759 bool isVISrc_256V2INT32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
761 }
762
763 bool isVISrc_512_b32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
765 }
766
767 bool isVISrc_512B16() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
769 }
770
771 bool isVISrc_512V2B16() const {
772 return isVISrc_512B16();
773 }
774
775 bool isVISrc_512_f32() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
777 }
778
779 bool isVISrc_512F16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
781 }
782
783 bool isVISrc_512V2F16() const {
784 return isVISrc_512F16() || isVISrc_512_b32();
785 }
786
787 bool isVISrc_1024_b32() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
789 }
790
791 bool isVISrc_1024B16() const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
793 }
794
795 bool isVISrc_1024V2B16() const {
796 return isVISrc_1024B16();
797 }
798
799 bool isVISrc_1024_f32() const {
800 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
801 }
802
803 bool isVISrc_1024F16() const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
805 }
806
807 bool isVISrc_1024V2F16() const {
808 return isVISrc_1024F16() || isVISrc_1024_b32();
809 }
810
811 bool isAISrcB32() const {
812 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
813 }
814
815 bool isAISrcB16() const {
816 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
817 }
818
819 bool isAISrcV2B16() const {
820 return isAISrcB16();
821 }
822
823 bool isAISrcF32() const {
824 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
825 }
826
827 bool isAISrcF16() const {
828 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
829 }
830
831 bool isAISrcV2F16() const {
832 return isAISrcF16() || isAISrcB32();
833 }
834
835 bool isAISrc_64B64() const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
837 }
838
839 bool isAISrc_64_f64() const {
840 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
841 }
842
843 bool isAISrc_128_b32() const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
845 }
846
847 bool isAISrc_128B16() const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
849 }
850
851 bool isAISrc_128V2B16() const {
852 return isAISrc_128B16();
853 }
854
855 bool isAISrc_128_f32() const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
857 }
858
859 bool isAISrc_128F16() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
861 }
862
863 bool isAISrc_128V2F16() const {
864 return isAISrc_128F16() || isAISrc_128_b32();
865 }
866
867 bool isVISrc_128_bf16() const {
868 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
869 }
870
871 bool isVISrc_128_f16() const {
872 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
873 }
874
875 bool isVISrc_128V2F16() const {
876 return isVISrc_128_f16() || isVISrc_128_b32();
877 }
878
879 bool isAISrc_256B64() const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
881 }
882
883 bool isAISrc_256_f64() const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
885 }
886
887 bool isAISrc_512_b32() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
889 }
890
891 bool isAISrc_512B16() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
893 }
894
895 bool isAISrc_512V2B16() const {
896 return isAISrc_512B16();
897 }
898
899 bool isAISrc_512_f32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
901 }
902
903 bool isAISrc_512F16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
905 }
906
907 bool isAISrc_512V2F16() const {
908 return isAISrc_512F16() || isAISrc_512_b32();
909 }
910
911 bool isAISrc_1024_b32() const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
913 }
914
915 bool isAISrc_1024B16() const {
916 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
917 }
918
919 bool isAISrc_1024V2B16() const {
920 return isAISrc_1024B16();
921 }
922
923 bool isAISrc_1024_f32() const {
924 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
925 }
926
927 bool isAISrc_1024F16() const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
929 }
930
931 bool isAISrc_1024V2F16() const {
932 return isAISrc_1024F16() || isAISrc_1024_b32();
933 }
934
935 bool isKImmFP32() const {
936 return isLiteralImm(MVT::f32);
937 }
938
939 bool isKImmFP16() const {
940 return isLiteralImm(MVT::f16);
941 }
942
943 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
944
945 bool isMem() const override {
946 return false;
947 }
948
949 bool isExpr() const {
950 return Kind == Expression;
951 }
952
953 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
954
955 bool isSWaitCnt() const;
956 bool isDepCtr() const;
957 bool isSDelayALU() const;
958 bool isHwreg() const;
959 bool isSendMsg() const;
960 bool isSplitBarrier() const;
961 bool isSwizzle() const;
962 bool isSMRDOffset8() const;
963 bool isSMEMOffset() const;
964 bool isSMRDLiteralOffset() const;
965 bool isDPP8() const;
966 bool isDPPCtrl() const;
967 bool isBLGP() const;
968 bool isGPRIdxMode() const;
969 bool isS16Imm() const;
970 bool isU16Imm() const;
971 bool isEndpgm() const;
972
973 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
974 return [this, P]() { return P(*this); };
975 }
976
977 StringRef getToken() const {
978 assert(isToken());
979 return StringRef(Tok.Data, Tok.Length);
980 }
981
982 int64_t getImm() const {
983 assert(isImm());
984 return Imm.Val;
985 }
986
987 void setImm(int64_t Val) {
988 assert(isImm());
989 Imm.Val = Val;
990 }
991
992 ImmTy getImmTy() const {
993 assert(isImm());
994 return Imm.Type;
995 }
996
997 MCRegister getReg() const override {
998 assert(isRegKind());
999 return Reg.RegNo;
1000 }
1001
1002 SMLoc getStartLoc() const override {
1003 return StartLoc;
1004 }
1005
1006 SMLoc getEndLoc() const override {
1007 return EndLoc;
1008 }
1009
1010 SMRange getLocRange() const {
1011 return SMRange(StartLoc, EndLoc);
1012 }
1013
1014 int getMCOpIdx() const { return MCOpIdx; }
1015
1016 Modifiers getModifiers() const {
1017 assert(isRegKind() || isImmTy(ImmTyNone));
1018 return isRegKind() ? Reg.Mods : Imm.Mods;
1019 }
1020
1021 void setModifiers(Modifiers Mods) {
1022 assert(isRegKind() || isImmTy(ImmTyNone));
1023 if (isRegKind())
1024 Reg.Mods = Mods;
1025 else
1026 Imm.Mods = Mods;
1027 }
1028
1029 bool hasModifiers() const {
1030 return getModifiers().hasModifiers();
1031 }
1032
1033 bool hasFPModifiers() const {
1034 return getModifiers().hasFPModifiers();
1035 }
1036
1037 bool hasIntModifiers() const {
1038 return getModifiers().hasIntModifiers();
1039 }
1040
1041 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1042
1043 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1044
1045 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1046
1047 void addRegOperands(MCInst &Inst, unsigned N) const;
1048
1049 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1050 if (isRegKind())
1051 addRegOperands(Inst, N);
1052 else
1053 addImmOperands(Inst, N);
1054 }
1055
1056 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1057 Modifiers Mods = getModifiers();
1058 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1059 if (isRegKind()) {
1060 addRegOperands(Inst, N);
1061 } else {
1062 addImmOperands(Inst, N, false);
1063 }
1064 }
1065
1066 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1067 assert(!hasIntModifiers());
1068 addRegOrImmWithInputModsOperands(Inst, N);
1069 }
1070
1071 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1072 assert(!hasFPModifiers());
1073 addRegOrImmWithInputModsOperands(Inst, N);
1074 }
1075
1076 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1077 Modifiers Mods = getModifiers();
1078 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1079 assert(isRegKind());
1080 addRegOperands(Inst, N);
1081 }
1082
1083 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1084 assert(!hasIntModifiers());
1085 addRegWithInputModsOperands(Inst, N);
1086 }
1087
1088 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1089 assert(!hasFPModifiers());
1090 addRegWithInputModsOperands(Inst, N);
1091 }
1092
1093 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1094 // clang-format off
1095 switch (Type) {
1096 case ImmTyNone: OS << "None"; break;
1097 case ImmTyGDS: OS << "GDS"; break;
1098 case ImmTyLDS: OS << "LDS"; break;
1099 case ImmTyOffen: OS << "Offen"; break;
1100 case ImmTyIdxen: OS << "Idxen"; break;
1101 case ImmTyAddr64: OS << "Addr64"; break;
1102 case ImmTyOffset: OS << "Offset"; break;
1103 case ImmTyInstOffset: OS << "InstOffset"; break;
1104 case ImmTyOffset0: OS << "Offset0"; break;
1105 case ImmTyOffset1: OS << "Offset1"; break;
1106 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1107 case ImmTyCPol: OS << "CPol"; break;
1108 case ImmTyIndexKey8bit: OS << "index_key"; break;
1109 case ImmTyIndexKey16bit: OS << "index_key"; break;
1110 case ImmTyIndexKey32bit: OS << "index_key"; break;
1111 case ImmTyTFE: OS << "TFE"; break;
1112 case ImmTyD16: OS << "D16"; break;
1113 case ImmTyFORMAT: OS << "FORMAT"; break;
1114 case ImmTyClamp: OS << "Clamp"; break;
1115 case ImmTyOModSI: OS << "OModSI"; break;
1116 case ImmTyDPP8: OS << "DPP8"; break;
1117 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1118 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1119 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1120 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1121 case ImmTyDppFI: OS << "DppFI"; break;
1122 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1123 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1124 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1125 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1126 case ImmTyDMask: OS << "DMask"; break;
1127 case ImmTyDim: OS << "Dim"; break;
1128 case ImmTyUNorm: OS << "UNorm"; break;
1129 case ImmTyDA: OS << "DA"; break;
1130 case ImmTyR128A16: OS << "R128A16"; break;
1131 case ImmTyA16: OS << "A16"; break;
1132 case ImmTyLWE: OS << "LWE"; break;
1133 case ImmTyOff: OS << "Off"; break;
1134 case ImmTyExpTgt: OS << "ExpTgt"; break;
1135 case ImmTyExpCompr: OS << "ExpCompr"; break;
1136 case ImmTyExpVM: OS << "ExpVM"; break;
1137 case ImmTyHwreg: OS << "Hwreg"; break;
1138 case ImmTySendMsg: OS << "SendMsg"; break;
1139 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1140 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1141 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1142 case ImmTyOpSel: OS << "OpSel"; break;
1143 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1144 case ImmTyNegLo: OS << "NegLo"; break;
1145 case ImmTyNegHi: OS << "NegHi"; break;
1146 case ImmTySwizzle: OS << "Swizzle"; break;
1147 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1148 case ImmTyHigh: OS << "High"; break;
1149 case ImmTyBLGP: OS << "BLGP"; break;
1150 case ImmTyCBSZ: OS << "CBSZ"; break;
1151 case ImmTyABID: OS << "ABID"; break;
1152 case ImmTyEndpgm: OS << "Endpgm"; break;
1153 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1154 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1155 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1156 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1157 case ImmTyBitOp3: OS << "BitOp3"; break;
1158 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1159 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1160 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1161 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1162 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1163 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1164 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1165 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1166 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1167 case ImmTyByteSel: OS << "ByteSel" ; break;
1168 }
1169 // clang-format on
1170 }
1171
1172 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1173 switch (Kind) {
1174 case Register:
1175 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1176 << " mods: " << Reg.Mods << '>';
1177 break;
1178 case Immediate:
1179 OS << '<' << getImm();
1180 if (getImmTy() != ImmTyNone) {
1181 OS << " type: "; printImmTy(OS, getImmTy());
1182 }
1183 OS << " mods: " << Imm.Mods << '>';
1184 break;
1185 case Token:
1186 OS << '\'' << getToken() << '\'';
1187 break;
1188 case Expression:
1189 OS << "<expr ";
1190 MAI.printExpr(OS, *Expr);
1191 OS << '>';
1192 break;
1193 }
1194 }
1195
1196 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1197 int64_t Val, SMLoc Loc,
1198 ImmTy Type = ImmTyNone,
1199 bool IsFPImm = false) {
1200 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1201 Op->Imm.Val = Val;
1202 Op->Imm.IsFPImm = IsFPImm;
1203 Op->Imm.Type = Type;
1204 Op->Imm.Mods = Modifiers();
1205 Op->StartLoc = Loc;
1206 Op->EndLoc = Loc;
1207 return Op;
1208 }
1209
1210 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1211 StringRef Str, SMLoc Loc,
1212 bool HasExplicitEncodingSize = true) {
1213 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1214 Res->Tok.Data = Str.data();
1215 Res->Tok.Length = Str.size();
1216 Res->StartLoc = Loc;
1217 Res->EndLoc = Loc;
1218 return Res;
1219 }
1220
1221 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1222 MCRegister Reg, SMLoc S, SMLoc E) {
1223 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1224 Op->Reg.RegNo = Reg;
1225 Op->Reg.Mods = Modifiers();
1226 Op->StartLoc = S;
1227 Op->EndLoc = E;
1228 return Op;
1229 }
1230
1231 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1232 const class MCExpr *Expr, SMLoc S) {
1233 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1234 Op->Expr = Expr;
1235 Op->StartLoc = S;
1236 Op->EndLoc = S;
1237 return Op;
1238 }
1239};
1240
1241raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1242 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1243 return OS;
1244}
1245
1246//===----------------------------------------------------------------------===//
1247// AsmParser
1248//===----------------------------------------------------------------------===//
1249
1250// Holds info related to the current kernel, e.g. count of SGPRs used.
1251// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1252// .amdgpu_hsa_kernel or at EOF.
1253class KernelScopeInfo {
1254 int SgprIndexUnusedMin = -1;
1255 int VgprIndexUnusedMin = -1;
1256 int AgprIndexUnusedMin = -1;
1257 MCContext *Ctx = nullptr;
1258 MCSubtargetInfo const *MSTI = nullptr;
1259
1260 void usesSgprAt(int i) {
1261 if (i >= SgprIndexUnusedMin) {
1262 SgprIndexUnusedMin = ++i;
1263 if (Ctx) {
1264 MCSymbol* const Sym =
1265 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1266 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1267 }
1268 }
1269 }
1270
1271 void usesVgprAt(int i) {
1272 if (i >= VgprIndexUnusedMin) {
1273 VgprIndexUnusedMin = ++i;
1274 if (Ctx) {
1275 MCSymbol* const Sym =
1276 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1277 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1278 VgprIndexUnusedMin);
1279 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1280 }
1281 }
1282 }
1283
1284 void usesAgprAt(int i) {
1285 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1286 if (!hasMAIInsts(*MSTI))
1287 return;
1288
1289 if (i >= AgprIndexUnusedMin) {
1290 AgprIndexUnusedMin = ++i;
1291 if (Ctx) {
1292 MCSymbol* const Sym =
1293 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1294 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1295
1296 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1297 MCSymbol* const vSym =
1298 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1299 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1300 VgprIndexUnusedMin);
1301 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1302 }
1303 }
1304 }
1305
1306public:
1307 KernelScopeInfo() = default;
1308
1309 void initialize(MCContext &Context) {
1310 Ctx = &Context;
1311 MSTI = Ctx->getSubtargetInfo();
1312
1313 usesSgprAt(SgprIndexUnusedMin = -1);
1314 usesVgprAt(VgprIndexUnusedMin = -1);
1315 if (hasMAIInsts(*MSTI)) {
1316 usesAgprAt(AgprIndexUnusedMin = -1);
1317 }
1318 }
1319
1320 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1321 unsigned RegWidth) {
1322 switch (RegKind) {
1323 case IS_SGPR:
1324 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1325 break;
1326 case IS_AGPR:
1327 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1328 break;
1329 case IS_VGPR:
1330 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1331 break;
1332 default:
1333 break;
1334 }
1335 }
1336};
1337
1338class AMDGPUAsmParser : public MCTargetAsmParser {
1339 MCAsmParser &Parser;
1340
1341 unsigned ForcedEncodingSize = 0;
1342 bool ForcedDPP = false;
1343 bool ForcedSDWA = false;
1344 KernelScopeInfo KernelScope;
1345
1346 /// @name Auto-generated Match Functions
1347 /// {
1348
1349#define GET_ASSEMBLER_HEADER
1350#include "AMDGPUGenAsmMatcher.inc"
1351
1352 /// }
1353
1354private:
1355 void createConstantSymbol(StringRef Id, int64_t Val);
1356
1357 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1358 bool OutOfRangeError(SMRange Range);
1359 /// Calculate VGPR/SGPR blocks required for given target, reserved
1360 /// registers, and user-specified NextFreeXGPR values.
1361 ///
1362 /// \param Features [in] Target features, used for bug corrections.
1363 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1364 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1365 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1366 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1367 /// descriptor field, if valid.
1368 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1369 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1370 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1371 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1372 /// \param VGPRBlocks [out] Result VGPR block count.
1373 /// \param SGPRBlocks [out] Result SGPR block count.
1374 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1375 const MCExpr *FlatScrUsed, bool XNACKUsed,
1376 std::optional<bool> EnableWavefrontSize32,
1377 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1378 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1379 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1380 bool ParseDirectiveAMDGCNTarget();
1381 bool ParseDirectiveAMDHSACodeObjectVersion();
1382 bool ParseDirectiveAMDHSAKernel();
1383 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1384 bool ParseDirectiveAMDKernelCodeT();
1385 // TODO: Possibly make subtargetHasRegister const.
1386 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1387 bool ParseDirectiveAMDGPUHsaKernel();
1388
1389 bool ParseDirectiveISAVersion();
1390 bool ParseDirectiveHSAMetadata();
1391 bool ParseDirectivePALMetadataBegin();
1392 bool ParseDirectivePALMetadata();
1393 bool ParseDirectiveAMDGPULDS();
1394
1395 /// Common code to parse out a block of text (typically YAML) between start and
1396 /// end directives.
1397 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1398 const char *AssemblerDirectiveEnd,
1399 std::string &CollectString);
1400
1401 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1402 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1403 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1404 unsigned &RegNum, unsigned &RegWidth,
1405 bool RestoreOnFailure = false);
1406 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1407 unsigned &RegNum, unsigned &RegWidth,
1409 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1410 unsigned &RegWidth,
1412 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1413 unsigned &RegWidth,
1415 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1416 unsigned &RegWidth,
1418 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1419 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1420 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1421
1422 bool isRegister();
1423 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1424 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1425 void initializeGprCountSymbol(RegisterKind RegKind);
1426 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1427 unsigned RegWidth);
1428 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1429 bool IsAtomic);
1430
1431public:
1432 enum OperandMode {
1433 OperandMode_Default,
1434 OperandMode_NSA,
1435 };
1436
1437 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1438
1439 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1440 const MCInstrInfo &MII,
1441 const MCTargetOptions &Options)
1442 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1444
1445 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1446
1447 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1448 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1449 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1450 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1451 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1452 } else {
1453 createConstantSymbol(".option.machine_version_major", ISA.Major);
1454 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1455 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1456 }
1457 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1458 initializeGprCountSymbol(IS_VGPR);
1459 initializeGprCountSymbol(IS_SGPR);
1460 } else
1461 KernelScope.initialize(getContext());
1462
1463 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1464 createConstantSymbol(Symbol, Code);
1465
1466 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1467 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1468 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1469 }
1470
1471 bool hasMIMG_R128() const {
1472 return AMDGPU::hasMIMG_R128(getSTI());
1473 }
1474
1475 bool hasPackedD16() const {
1476 return AMDGPU::hasPackedD16(getSTI());
1477 }
1478
1479 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1480
1481 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1482
1483 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1484
1485 bool isSI() const {
1486 return AMDGPU::isSI(getSTI());
1487 }
1488
1489 bool isCI() const {
1490 return AMDGPU::isCI(getSTI());
1491 }
1492
1493 bool isVI() const {
1494 return AMDGPU::isVI(getSTI());
1495 }
1496
1497 bool isGFX9() const {
1498 return AMDGPU::isGFX9(getSTI());
1499 }
1500
1501 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1502 bool isGFX90A() const {
1503 return AMDGPU::isGFX90A(getSTI());
1504 }
1505
1506 bool isGFX940() const {
1507 return AMDGPU::isGFX940(getSTI());
1508 }
1509
1510 bool isGFX9Plus() const {
1511 return AMDGPU::isGFX9Plus(getSTI());
1512 }
1513
1514 bool isGFX10() const {
1515 return AMDGPU::isGFX10(getSTI());
1516 }
1517
1518 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1519
1520 bool isGFX11() const {
1521 return AMDGPU::isGFX11(getSTI());
1522 }
1523
1524 bool isGFX11Plus() const {
1525 return AMDGPU::isGFX11Plus(getSTI());
1526 }
1527
1528 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1529
1530 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1531
1532 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1533
1534 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1535
1536 bool isGFX10_BEncoding() const {
1537 return AMDGPU::isGFX10_BEncoding(getSTI());
1538 }
1539
1540 bool hasInv2PiInlineImm() const {
1541 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1542 }
1543
1544 bool has64BitLiterals() const {
1545 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1546 }
1547
1548 bool hasFlatOffsets() const {
1549 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1550 }
1551
1552 bool hasTrue16Insts() const {
1553 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1554 }
1555
1556 bool hasArchitectedFlatScratch() const {
1557 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1558 }
1559
1560 bool hasSGPR102_SGPR103() const {
1561 return !isVI() && !isGFX9();
1562 }
1563
1564 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1565
1566 bool hasIntClamp() const {
1567 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1568 }
1569
1570 bool hasPartialNSAEncoding() const {
1571 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1572 }
1573
1574 bool hasGloballyAddressableScratch() const {
1575 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1576 }
1577
1578 unsigned getNSAMaxSize(bool HasSampler = false) const {
1579 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1580 }
1581
1582 unsigned getMaxNumUserSGPRs() const {
1583 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1584 }
1585
1586 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1587
1588 AMDGPUTargetStreamer &getTargetStreamer() {
1589 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1590 return static_cast<AMDGPUTargetStreamer &>(TS);
1591 }
1592
1593 const MCRegisterInfo *getMRI() const {
1594 // We need this const_cast because for some reason getContext() is not const
1595 // in MCAsmParser.
1596 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1597 }
1598
1599 const MCInstrInfo *getMII() const {
1600 return &MII;
1601 }
1602
1603 const FeatureBitset &getFeatureBits() const {
1604 return getSTI().getFeatureBits();
1605 }
1606
1607 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1608 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1609 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1610
1611 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1612 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1613 bool isForcedDPP() const { return ForcedDPP; }
1614 bool isForcedSDWA() const { return ForcedSDWA; }
1615 ArrayRef<unsigned> getMatchedVariants() const;
1616 StringRef getMatchedVariantName() const;
1617
1618 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1619 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1620 bool RestoreOnFailure);
1621 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1622 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1623 SMLoc &EndLoc) override;
1624 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1625 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1626 unsigned Kind) override;
1627 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1628 OperandVector &Operands, MCStreamer &Out,
1629 uint64_t &ErrorInfo,
1630 bool MatchingInlineAsm) override;
1631 bool ParseDirective(AsmToken DirectiveID) override;
1632 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1633 OperandMode Mode = OperandMode_Default);
1634 StringRef parseMnemonicSuffix(StringRef Name);
1635 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1636 SMLoc NameLoc, OperandVector &Operands) override;
1637 //bool ProcessInstruction(MCInst &Inst);
1638
1639 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1640
1641 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1642
1643 ParseStatus
1644 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1645 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1646 std::function<bool(int64_t &)> ConvertResult = nullptr);
1647
1648 ParseStatus parseOperandArrayWithPrefix(
1649 const char *Prefix, OperandVector &Operands,
1650 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1651 bool (*ConvertResult)(int64_t &) = nullptr);
1652
1653 ParseStatus
1654 parseNamedBit(StringRef Name, OperandVector &Operands,
1655 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1656 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1657 ParseStatus parseCPol(OperandVector &Operands);
1658 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1659 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1660 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1661 SMLoc &StringLoc);
1662 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1663 StringRef Name,
1664 ArrayRef<const char *> Ids,
1665 int64_t &IntVal);
1666 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1667 StringRef Name,
1668 ArrayRef<const char *> Ids,
1669 AMDGPUOperand::ImmTy Type);
1670
1671 bool isModifier();
1672 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1673 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1674 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1675 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1676 bool parseSP3NegModifier();
1677 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1678 bool HasLit = false, bool HasLit64 = false);
1679 ParseStatus parseReg(OperandVector &Operands);
1680 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1681 bool HasLit = false, bool HasLit64 = false);
1682 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1683 bool AllowImm = true);
1684 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1685 bool AllowImm = true);
1686 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1687 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1688 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1689 ParseStatus tryParseIndexKey(OperandVector &Operands,
1690 AMDGPUOperand::ImmTy ImmTy);
1691 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1692 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1693 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1694 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1695 AMDGPUOperand::ImmTy Type);
1696 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1697 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1698 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1699 AMDGPUOperand::ImmTy Type);
1700 ParseStatus parseMatrixAScale(OperandVector &Operands);
1701 ParseStatus parseMatrixBScale(OperandVector &Operands);
1702 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1703 AMDGPUOperand::ImmTy Type);
1704 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1705 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1706
1707 ParseStatus parseDfmtNfmt(int64_t &Format);
1708 ParseStatus parseUfmt(int64_t &Format);
1709 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1710 int64_t &Format);
1711 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1712 int64_t &Format);
1713 ParseStatus parseFORMAT(OperandVector &Operands);
1714 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1715 ParseStatus parseNumericFormat(int64_t &Format);
1716 ParseStatus parseFlatOffset(OperandVector &Operands);
1717 ParseStatus parseR128A16(OperandVector &Operands);
1718 ParseStatus parseBLGP(OperandVector &Operands);
1719 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1720 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1721
1722 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1723
1724 bool parseCnt(int64_t &IntVal);
1725 ParseStatus parseSWaitCnt(OperandVector &Operands);
1726
1727 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1728 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1729 ParseStatus parseDepCtr(OperandVector &Operands);
1730
1731 bool parseDelay(int64_t &Delay);
1732 ParseStatus parseSDelayALU(OperandVector &Operands);
1733
1734 ParseStatus parseHwreg(OperandVector &Operands);
1735
1736private:
1737 struct OperandInfoTy {
1738 SMLoc Loc;
1739 int64_t Val;
1740 bool IsSymbolic = false;
1741 bool IsDefined = false;
1742
1743 OperandInfoTy(int64_t Val) : Val(Val) {}
1744 };
1745
1746 struct StructuredOpField : OperandInfoTy {
1747 StringLiteral Id;
1748 StringLiteral Desc;
1749 unsigned Width;
1750 bool IsDefined = false;
1751
1752 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1753 int64_t Default)
1754 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1755 virtual ~StructuredOpField() = default;
1756
1757 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1758 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1759 return false;
1760 }
1761
1762 virtual bool validate(AMDGPUAsmParser &Parser) const {
1763 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1764 return Error(Parser, "not supported on this GPU");
1765 if (!isUIntN(Width, Val))
1766 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1767 return true;
1768 }
1769 };
1770
1771 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1772 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1773
1774 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1775 bool validateSendMsg(const OperandInfoTy &Msg,
1776 const OperandInfoTy &Op,
1777 const OperandInfoTy &Stream);
1778
1779 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1780 OperandInfoTy &Width);
1781
1782 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1783
1784 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1785 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1786 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1787
1788 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1789 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1790 const OperandVector &Operands) const;
1791 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1792 const OperandVector &Operands) const;
1793 SMLoc getInstLoc(const OperandVector &Operands) const;
1794
1795 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1796 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1797 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1798 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1799 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1800 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1801 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1802 bool AsVOPD3);
1803 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1804 bool tryVOPD(const MCInst &Inst);
1805 bool tryVOPD3(const MCInst &Inst);
1806 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1807
1808 bool validateIntClampSupported(const MCInst &Inst);
1809 bool validateMIMGAtomicDMask(const MCInst &Inst);
1810 bool validateMIMGGatherDMask(const MCInst &Inst);
1811 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1812 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1813 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1814 bool validateMIMGD16(const MCInst &Inst);
1815 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1816 bool validateTensorR128(const MCInst &Inst);
1817 bool validateMIMGMSAA(const MCInst &Inst);
1818 bool validateOpSel(const MCInst &Inst);
1819 bool validateTrue16OpSel(const MCInst &Inst);
1820 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1821 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1822 bool validateVccOperand(MCRegister Reg) const;
1823 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1824 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1825 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1826 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1827 bool validateAGPRLdSt(const MCInst &Inst) const;
1828 bool validateVGPRAlign(const MCInst &Inst) const;
1829 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1830 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1831 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1832 bool validateDivScale(const MCInst &Inst);
1833 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1834 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1835 const SMLoc &IDLoc);
1836 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1837 const unsigned CPol);
1838 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1839 bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);
1840 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1841 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1842 unsigned getConstantBusLimit(unsigned Opcode) const;
1843 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1844 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1845 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1846
1847 bool isSupportedMnemo(StringRef Mnemo,
1848 const FeatureBitset &FBS);
1849 bool isSupportedMnemo(StringRef Mnemo,
1850 const FeatureBitset &FBS,
1851 ArrayRef<unsigned> Variants);
1852 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1853
1854 bool isId(const StringRef Id) const;
1855 bool isId(const AsmToken &Token, const StringRef Id) const;
1856 bool isToken(const AsmToken::TokenKind Kind) const;
1857 StringRef getId() const;
1858 bool trySkipId(const StringRef Id);
1859 bool trySkipId(const StringRef Pref, const StringRef Id);
1860 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1861 bool trySkipToken(const AsmToken::TokenKind Kind);
1862 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1863 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1864 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1865
1866 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1867 AsmToken::TokenKind getTokenKind() const;
1868 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1870 StringRef getTokenStr() const;
1871 AsmToken peekToken(bool ShouldSkipSpace = true);
1872 AsmToken getToken() const;
1873 SMLoc getLoc() const;
1874 void lex();
1875
1876public:
1877 void onBeginOfFile() override;
1878 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1879
1880 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1881
1882 ParseStatus parseExpTgt(OperandVector &Operands);
1883 ParseStatus parseSendMsg(OperandVector &Operands);
1884 ParseStatus parseInterpSlot(OperandVector &Operands);
1885 ParseStatus parseInterpAttr(OperandVector &Operands);
1886 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1887 ParseStatus parseBoolReg(OperandVector &Operands);
1888
1889 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1890 const unsigned MaxVal, const Twine &ErrMsg,
1891 SMLoc &Loc);
1892 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1893 const unsigned MinVal,
1894 const unsigned MaxVal,
1895 const StringRef ErrMsg);
1896 ParseStatus parseSwizzle(OperandVector &Operands);
1897 bool parseSwizzleOffset(int64_t &Imm);
1898 bool parseSwizzleMacro(int64_t &Imm);
1899 bool parseSwizzleQuadPerm(int64_t &Imm);
1900 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1901 bool parseSwizzleBroadcast(int64_t &Imm);
1902 bool parseSwizzleSwap(int64_t &Imm);
1903 bool parseSwizzleReverse(int64_t &Imm);
1904 bool parseSwizzleFFT(int64_t &Imm);
1905 bool parseSwizzleRotate(int64_t &Imm);
1906
1907 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1908 int64_t parseGPRIdxMacro();
1909
1910 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1911 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1912
1913 ParseStatus parseOModSI(OperandVector &Operands);
1914
1915 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1916 OptionalImmIndexMap &OptionalIdx);
1917 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1918 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1919 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1920 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1921 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1922
1923 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1924 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1925 OptionalImmIndexMap &OptionalIdx);
1926 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1927 OptionalImmIndexMap &OptionalIdx);
1928
1929 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1930 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1931
1932 bool parseDimId(unsigned &Encoding);
1933 ParseStatus parseDim(OperandVector &Operands);
1934 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1935 ParseStatus parseDPP8(OperandVector &Operands);
1936 ParseStatus parseDPPCtrl(OperandVector &Operands);
1937 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1938 int64_t parseDPPCtrlSel(StringRef Ctrl);
1939 int64_t parseDPPCtrlPerm();
1940 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1941 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1942 cvtDPP(Inst, Operands, true);
1943 }
1944 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1945 bool IsDPP8 = false);
1946 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1947 cvtVOP3DPP(Inst, Operands, true);
1948 }
1949
1950 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1951 AMDGPUOperand::ImmTy Type);
1952 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1953 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1954 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1955 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1956 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1957 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1958 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1959 uint64_t BasicInstType,
1960 bool SkipDstVcc = false,
1961 bool SkipSrcVcc = false);
1962
1963 ParseStatus parseEndpgm(OperandVector &Operands);
1964
1965 ParseStatus parseVOPD(OperandVector &Operands);
1966};
1967
1968} // end anonymous namespace
1969
1970// May be called with integer type with equivalent bitwidth.
1971static const fltSemantics *getFltSemantics(unsigned Size) {
1972 switch (Size) {
1973 case 4:
1974 return &APFloat::IEEEsingle();
1975 case 8:
1976 return &APFloat::IEEEdouble();
1977 case 2:
1978 return &APFloat::IEEEhalf();
1979 default:
1980 llvm_unreachable("unsupported fp type");
1981 }
1982}
1983
1985 return getFltSemantics(VT.getSizeInBits() / 8);
1986}
1987
1989 switch (OperandType) {
1990 // When floating-point immediate is used as operand of type i16, the 32-bit
1991 // representation of the constant truncated to the 16 LSBs should be used.
2006 return &APFloat::IEEEsingle();
2013 return &APFloat::IEEEdouble();
2020 return &APFloat::IEEEhalf();
2025 return &APFloat::BFloat();
2026 default:
2027 llvm_unreachable("unsupported fp type");
2028 }
2029}
2030
2031//===----------------------------------------------------------------------===//
2032// Operand
2033//===----------------------------------------------------------------------===//
2034
2035static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2036 bool Lost;
2037
2038 // Convert literal to single precision
2041 &Lost);
2042 // We allow precision lost but not overflow or underflow
2043 if (Status != APFloat::opOK &&
2044 Lost &&
2045 ((Status & APFloat::opOverflow) != 0 ||
2046 (Status & APFloat::opUnderflow) != 0)) {
2047 return false;
2048 }
2049
2050 return true;
2051}
2052
2053static bool isSafeTruncation(int64_t Val, unsigned Size) {
2054 return isUIntN(Size, Val) || isIntN(Size, Val);
2055}
2056
2057static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2058 if (VT.getScalarType() == MVT::i16)
2059 return isInlinableLiteral32(Val, HasInv2Pi);
2060
2061 if (VT.getScalarType() == MVT::f16)
2062 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2063
2064 assert(VT.getScalarType() == MVT::bf16);
2065
2066 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2067}
2068
2069bool AMDGPUOperand::isInlinableImm(MVT type) const {
2070
2071 // This is a hack to enable named inline values like
2072 // shared_base with both 32-bit and 64-bit operands.
2073 // Note that these values are defined as
2074 // 32-bit operands only.
2075 if (isInlineValue()) {
2076 return true;
2077 }
2078
2079 if (!isImmTy(ImmTyNone)) {
2080 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2081 return false;
2082 }
2083 // TODO: We should avoid using host float here. It would be better to
2084 // check the float bit values which is what a few other places do.
2085 // We've had bot failures before due to weird NaN support on mips hosts.
2086
2087 APInt Literal(64, Imm.Val);
2088
2089 if (Imm.IsFPImm) { // We got fp literal token
2090 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2092 AsmParser->hasInv2PiInlineImm());
2093 }
2094
2095 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2096 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2097 return false;
2098
2099 if (type.getScalarSizeInBits() == 16) {
2100 bool Lost = false;
2101 switch (type.getScalarType().SimpleTy) {
2102 default:
2103 llvm_unreachable("unknown 16-bit type");
2104 case MVT::bf16:
2105 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2106 &Lost);
2107 break;
2108 case MVT::f16:
2109 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2110 &Lost);
2111 break;
2112 case MVT::i16:
2113 FPLiteral.convert(APFloatBase::IEEEsingle(),
2114 APFloat::rmNearestTiesToEven, &Lost);
2115 break;
2116 }
2117 // We need to use 32-bit representation here because when a floating-point
2118 // inline constant is used as an i16 operand, its 32-bit representation
2119 // representation will be used. We will need the 32-bit value to check if
2120 // it is FP inline constant.
2121 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2122 return isInlineableLiteralOp16(ImmVal, type,
2123 AsmParser->hasInv2PiInlineImm());
2124 }
2125
2126 // Check if single precision literal is inlinable
2128 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2129 AsmParser->hasInv2PiInlineImm());
2130 }
2131
2132 // We got int literal token.
2133 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2135 AsmParser->hasInv2PiInlineImm());
2136 }
2137
2138 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2139 return false;
2140 }
2141
2142 if (type.getScalarSizeInBits() == 16) {
2144 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2145 type, AsmParser->hasInv2PiInlineImm());
2146 }
2147
2149 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2150 AsmParser->hasInv2PiInlineImm());
2151}
2152
2153bool AMDGPUOperand::isLiteralImm(MVT type) const {
2154 // Check that this immediate can be added as literal
2155 if (!isImmTy(ImmTyNone)) {
2156 return false;
2157 }
2158
2159 bool Allow64Bit =
2160 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2161
2162 if (!Imm.IsFPImm) {
2163 // We got int literal token.
2164
2165 if (type == MVT::f64 && hasFPModifiers()) {
2166 // Cannot apply fp modifiers to int literals preserving the same semantics
2167 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2168 // disable these cases.
2169 return false;
2170 }
2171
2172 unsigned Size = type.getSizeInBits();
2173 if (Size == 64) {
2174 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2175 return true;
2176 Size = 32;
2177 }
2178
2179 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2180 // types.
2181 return isSafeTruncation(Imm.Val, Size);
2182 }
2183
2184 // We got fp literal token
2185 if (type == MVT::f64) { // Expected 64-bit fp operand
2186 // We would set low 64-bits of literal to zeroes but we accept this literals
2187 return true;
2188 }
2189
2190 if (type == MVT::i64) { // Expected 64-bit int operand
2191 // We don't allow fp literals in 64-bit integer instructions. It is
2192 // unclear how we should encode them.
2193 return false;
2194 }
2195
2196 // We allow fp literals with f16x2 operands assuming that the specified
2197 // literal goes into the lower half and the upper half is zero. We also
2198 // require that the literal may be losslessly converted to f16.
2199 //
2200 // For i16x2 operands, we assume that the specified literal is encoded as a
2201 // single-precision float. This is pretty odd, but it matches SP3 and what
2202 // happens in hardware.
2203 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2204 : (type == MVT::v2i16) ? MVT::f32
2205 : (type == MVT::v2f32) ? MVT::f32
2206 : type;
2207
2208 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2209 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2210}
2211
2212bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2213 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2214}
2215
2216bool AMDGPUOperand::isVRegWithInputMods() const {
2217 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2218 // GFX90A allows DPP on 64-bit operands.
2219 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2220 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2221}
2222
2223template <bool IsFake16>
2224bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2225 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2226 : AMDGPU::VGPR_16_Lo128RegClassID);
2227}
2228
2229template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2230 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2231 : AMDGPU::VGPR_16RegClassID);
2232}
2233
2234bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2235 if (AsmParser->isVI())
2236 return isVReg32();
2237 if (AsmParser->isGFX9Plus())
2238 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2239 return false;
2240}
2241
2242bool AMDGPUOperand::isSDWAFP16Operand() const {
2243 return isSDWAOperand(MVT::f16);
2244}
2245
2246bool AMDGPUOperand::isSDWAFP32Operand() const {
2247 return isSDWAOperand(MVT::f32);
2248}
2249
2250bool AMDGPUOperand::isSDWAInt16Operand() const {
2251 return isSDWAOperand(MVT::i16);
2252}
2253
2254bool AMDGPUOperand::isSDWAInt32Operand() const {
2255 return isSDWAOperand(MVT::i32);
2256}
2257
2258bool AMDGPUOperand::isBoolReg() const {
2259 auto FB = AsmParser->getFeatureBits();
2260 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2261 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2262}
2263
2264uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2265{
2266 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2267 assert(Size == 2 || Size == 4 || Size == 8);
2268
2269 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2270
2271 if (Imm.Mods.Abs) {
2272 Val &= ~FpSignMask;
2273 }
2274 if (Imm.Mods.Neg) {
2275 Val ^= FpSignMask;
2276 }
2277
2278 return Val;
2279}
2280
2281void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2282 MCOpIdx = Inst.getNumOperands();
2283
2284 if (isExpr()) {
2286 return;
2287 }
2288
2289 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2290 Inst.getNumOperands())) {
2291 addLiteralImmOperand(Inst, Imm.Val,
2292 ApplyModifiers &
2293 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2294 } else {
2295 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2297 }
2298}
2299
2300void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2301 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2302 auto OpNum = Inst.getNumOperands();
2303 // Check that this operand accepts literals
2304 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2305
2306 if (ApplyModifiers) {
2307 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2308 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2309 Val = applyInputFPModifiers(Val, Size);
2310 }
2311
2312 APInt Literal(64, Val);
2313 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2314
2315 if (Imm.IsFPImm) { // We got fp literal token
2316 switch (OpTy) {
2322 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2323 AsmParser->hasInv2PiInlineImm())) {
2324 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2325 return;
2326 }
2327
2328 // Non-inlineable
2329 if (AMDGPU::isSISrcFPOperand(InstDesc,
2330 OpNum)) { // Expected 64-bit fp operand
2331 bool HasMandatoryLiteral =
2332 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2333 // For fp operands we check if low 32 bits are zeros
2334 if (Literal.getLoBits(32) != 0 &&
2335 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2336 !HasMandatoryLiteral) {
2337 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2338 Inst.getLoc(),
2339 "Can't encode literal as exact 64-bit floating-point operand. "
2340 "Low 32-bits will be set to zero");
2341 Val &= 0xffffffff00000000u;
2342 }
2343
2345 return;
2346 }
2347
2348 // We don't allow fp literals in 64-bit integer instructions. It is
2349 // unclear how we should encode them. This case should be checked earlier
2350 // in predicate methods (isLiteralImm())
2351 llvm_unreachable("fp literal in 64-bit integer instruction.");
2352
2355 return;
2356
2361 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2362 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2363 // loss of precision. The constant represents ideomatic fp32 value of
2364 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2365 // bits. Prevent rounding below.
2366 Inst.addOperand(MCOperand::createImm(0x3e22));
2367 return;
2368 }
2369 [[fallthrough]];
2370
2391 bool lost;
2392 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2393 // Convert literal to single precision
2394 FPLiteral.convert(*getOpFltSemantics(OpTy),
2395 APFloat::rmNearestTiesToEven, &lost);
2396 // We allow precision lost but not overflow or underflow. This should be
2397 // checked earlier in isLiteralImm()
2398
2399 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2400 Inst.addOperand(MCOperand::createImm(ImmVal));
2401 return;
2402 }
2403 default:
2404 llvm_unreachable("invalid operand size");
2405 }
2406
2407 return;
2408 }
2409
2410 // We got int literal token.
2411 // Only sign extend inline immediates.
2412 switch (OpTy) {
2427 return;
2428
2431 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2433 return;
2434 }
2435
2436 // When the 32 MSBs are not zero (effectively means it can't be safely
2437 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2438 // the lit modifier is explicitly used, we need to truncate it to the 32
2439 // LSBs.
2440 if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
2441 Val = Lo_32(Val);
2442
2444 return;
2445
2449 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2451 return;
2452 }
2453
2454 // If the target doesn't support 64-bit literals, we need to use the
2455 // constant as the high 32 MSBs of a double-precision floating point value.
2456 if (!AsmParser->has64BitLiterals()) {
2457 Val = static_cast<uint64_t>(Val) << 32;
2458 } else {
2459 // Now the target does support 64-bit literals, there are two cases
2460 // where we still want to use src_literal encoding:
2461 // 1) explicitly forced by using lit modifier;
2462 // 2) the value is a valid 32-bit representation (signed or unsigned),
2463 // meanwhile not forced by lit64 modifier.
2464 if (getModifiers().Lit ||
2465 (!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2466 Val = static_cast<uint64_t>(Val) << 32;
2467 }
2468
2470 return;
2471
2484 return;
2485
2487 if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)
2488 Val <<= 32;
2489
2491 return;
2492
2493 default:
2494 llvm_unreachable("invalid operand type");
2495 }
2496}
2497
2498void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2499 MCOpIdx = Inst.getNumOperands();
2500 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2501}
2502
2503bool AMDGPUOperand::isInlineValue() const {
2504 return isRegKind() && ::isInlineValue(getReg());
2505}
2506
2507//===----------------------------------------------------------------------===//
2508// AsmParser
2509//===----------------------------------------------------------------------===//
2510
2511void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2512 // TODO: make those pre-defined variables read-only.
2513 // Currently there is none suitable machinery in the core llvm-mc for this.
2514 // MCSymbol::isRedefinable is intended for another purpose, and
2515 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2516 MCContext &Ctx = getContext();
2517 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2519}
2520
2521static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2522 if (Is == IS_VGPR) {
2523 switch (RegWidth) {
2524 default: return -1;
2525 case 32:
2526 return AMDGPU::VGPR_32RegClassID;
2527 case 64:
2528 return AMDGPU::VReg_64RegClassID;
2529 case 96:
2530 return AMDGPU::VReg_96RegClassID;
2531 case 128:
2532 return AMDGPU::VReg_128RegClassID;
2533 case 160:
2534 return AMDGPU::VReg_160RegClassID;
2535 case 192:
2536 return AMDGPU::VReg_192RegClassID;
2537 case 224:
2538 return AMDGPU::VReg_224RegClassID;
2539 case 256:
2540 return AMDGPU::VReg_256RegClassID;
2541 case 288:
2542 return AMDGPU::VReg_288RegClassID;
2543 case 320:
2544 return AMDGPU::VReg_320RegClassID;
2545 case 352:
2546 return AMDGPU::VReg_352RegClassID;
2547 case 384:
2548 return AMDGPU::VReg_384RegClassID;
2549 case 512:
2550 return AMDGPU::VReg_512RegClassID;
2551 case 1024:
2552 return AMDGPU::VReg_1024RegClassID;
2553 }
2554 } else if (Is == IS_TTMP) {
2555 switch (RegWidth) {
2556 default: return -1;
2557 case 32:
2558 return AMDGPU::TTMP_32RegClassID;
2559 case 64:
2560 return AMDGPU::TTMP_64RegClassID;
2561 case 128:
2562 return AMDGPU::TTMP_128RegClassID;
2563 case 256:
2564 return AMDGPU::TTMP_256RegClassID;
2565 case 512:
2566 return AMDGPU::TTMP_512RegClassID;
2567 }
2568 } else if (Is == IS_SGPR) {
2569 switch (RegWidth) {
2570 default: return -1;
2571 case 32:
2572 return AMDGPU::SGPR_32RegClassID;
2573 case 64:
2574 return AMDGPU::SGPR_64RegClassID;
2575 case 96:
2576 return AMDGPU::SGPR_96RegClassID;
2577 case 128:
2578 return AMDGPU::SGPR_128RegClassID;
2579 case 160:
2580 return AMDGPU::SGPR_160RegClassID;
2581 case 192:
2582 return AMDGPU::SGPR_192RegClassID;
2583 case 224:
2584 return AMDGPU::SGPR_224RegClassID;
2585 case 256:
2586 return AMDGPU::SGPR_256RegClassID;
2587 case 288:
2588 return AMDGPU::SGPR_288RegClassID;
2589 case 320:
2590 return AMDGPU::SGPR_320RegClassID;
2591 case 352:
2592 return AMDGPU::SGPR_352RegClassID;
2593 case 384:
2594 return AMDGPU::SGPR_384RegClassID;
2595 case 512:
2596 return AMDGPU::SGPR_512RegClassID;
2597 }
2598 } else if (Is == IS_AGPR) {
2599 switch (RegWidth) {
2600 default: return -1;
2601 case 32:
2602 return AMDGPU::AGPR_32RegClassID;
2603 case 64:
2604 return AMDGPU::AReg_64RegClassID;
2605 case 96:
2606 return AMDGPU::AReg_96RegClassID;
2607 case 128:
2608 return AMDGPU::AReg_128RegClassID;
2609 case 160:
2610 return AMDGPU::AReg_160RegClassID;
2611 case 192:
2612 return AMDGPU::AReg_192RegClassID;
2613 case 224:
2614 return AMDGPU::AReg_224RegClassID;
2615 case 256:
2616 return AMDGPU::AReg_256RegClassID;
2617 case 288:
2618 return AMDGPU::AReg_288RegClassID;
2619 case 320:
2620 return AMDGPU::AReg_320RegClassID;
2621 case 352:
2622 return AMDGPU::AReg_352RegClassID;
2623 case 384:
2624 return AMDGPU::AReg_384RegClassID;
2625 case 512:
2626 return AMDGPU::AReg_512RegClassID;
2627 case 1024:
2628 return AMDGPU::AReg_1024RegClassID;
2629 }
2630 }
2631 return -1;
2632}
2633
2636 .Case("exec", AMDGPU::EXEC)
2637 .Case("vcc", AMDGPU::VCC)
2638 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2639 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2640 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2641 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2642 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2643 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2644 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2645 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2646 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2647 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2648 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2649 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2650 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2651 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2652 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2653 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2654 .Case("m0", AMDGPU::M0)
2655 .Case("vccz", AMDGPU::SRC_VCCZ)
2656 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2657 .Case("execz", AMDGPU::SRC_EXECZ)
2658 .Case("src_execz", AMDGPU::SRC_EXECZ)
2659 .Case("scc", AMDGPU::SRC_SCC)
2660 .Case("src_scc", AMDGPU::SRC_SCC)
2661 .Case("tba", AMDGPU::TBA)
2662 .Case("tma", AMDGPU::TMA)
2663 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2664 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2665 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2666 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2667 .Case("vcc_lo", AMDGPU::VCC_LO)
2668 .Case("vcc_hi", AMDGPU::VCC_HI)
2669 .Case("exec_lo", AMDGPU::EXEC_LO)
2670 .Case("exec_hi", AMDGPU::EXEC_HI)
2671 .Case("tma_lo", AMDGPU::TMA_LO)
2672 .Case("tma_hi", AMDGPU::TMA_HI)
2673 .Case("tba_lo", AMDGPU::TBA_LO)
2674 .Case("tba_hi", AMDGPU::TBA_HI)
2675 .Case("pc", AMDGPU::PC_REG)
2676 .Case("null", AMDGPU::SGPR_NULL)
2677 .Default(AMDGPU::NoRegister);
2678}
2679
2680bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2681 SMLoc &EndLoc, bool RestoreOnFailure) {
2682 auto R = parseRegister();
2683 if (!R) return true;
2684 assert(R->isReg());
2685 RegNo = R->getReg();
2686 StartLoc = R->getStartLoc();
2687 EndLoc = R->getEndLoc();
2688 return false;
2689}
2690
2691bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2692 SMLoc &EndLoc) {
2693 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2694}
2695
2696ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2697 SMLoc &EndLoc) {
2698 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2699 bool PendingErrors = getParser().hasPendingError();
2700 getParser().clearPendingErrors();
2701 if (PendingErrors)
2702 return ParseStatus::Failure;
2703 if (Result)
2704 return ParseStatus::NoMatch;
2705 return ParseStatus::Success;
2706}
2707
2708bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2709 RegisterKind RegKind,
2710 MCRegister Reg1, SMLoc Loc) {
2711 switch (RegKind) {
2712 case IS_SPECIAL:
2713 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2714 Reg = AMDGPU::EXEC;
2715 RegWidth = 64;
2716 return true;
2717 }
2718 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2719 Reg = AMDGPU::FLAT_SCR;
2720 RegWidth = 64;
2721 return true;
2722 }
2723 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2724 Reg = AMDGPU::XNACK_MASK;
2725 RegWidth = 64;
2726 return true;
2727 }
2728 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2729 Reg = AMDGPU::VCC;
2730 RegWidth = 64;
2731 return true;
2732 }
2733 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2734 Reg = AMDGPU::TBA;
2735 RegWidth = 64;
2736 return true;
2737 }
2738 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2739 Reg = AMDGPU::TMA;
2740 RegWidth = 64;
2741 return true;
2742 }
2743 Error(Loc, "register does not fit in the list");
2744 return false;
2745 case IS_VGPR:
2746 case IS_SGPR:
2747 case IS_AGPR:
2748 case IS_TTMP:
2749 if (Reg1 != Reg + RegWidth / 32) {
2750 Error(Loc, "registers in a list must have consecutive indices");
2751 return false;
2752 }
2753 RegWidth += 32;
2754 return true;
2755 default:
2756 llvm_unreachable("unexpected register kind");
2757 }
2758}
2759
2760struct RegInfo {
2762 RegisterKind Kind;
2763};
2764
2765static constexpr RegInfo RegularRegisters[] = {
2766 {{"v"}, IS_VGPR},
2767 {{"s"}, IS_SGPR},
2768 {{"ttmp"}, IS_TTMP},
2769 {{"acc"}, IS_AGPR},
2770 {{"a"}, IS_AGPR},
2771};
2772
2773static bool isRegularReg(RegisterKind Kind) {
2774 return Kind == IS_VGPR ||
2775 Kind == IS_SGPR ||
2776 Kind == IS_TTMP ||
2777 Kind == IS_AGPR;
2778}
2779
2781 for (const RegInfo &Reg : RegularRegisters)
2782 if (Str.starts_with(Reg.Name))
2783 return &Reg;
2784 return nullptr;
2785}
2786
2787static bool getRegNum(StringRef Str, unsigned& Num) {
2788 return !Str.getAsInteger(10, Num);
2789}
2790
2791bool
2792AMDGPUAsmParser::isRegister(const AsmToken &Token,
2793 const AsmToken &NextToken) const {
2794
2795 // A list of consecutive registers: [s0,s1,s2,s3]
2796 if (Token.is(AsmToken::LBrac))
2797 return true;
2798
2799 if (!Token.is(AsmToken::Identifier))
2800 return false;
2801
2802 // A single register like s0 or a range of registers like s[0:1]
2803
2804 StringRef Str = Token.getString();
2805 const RegInfo *Reg = getRegularRegInfo(Str);
2806 if (Reg) {
2807 StringRef RegName = Reg->Name;
2808 StringRef RegSuffix = Str.substr(RegName.size());
2809 if (!RegSuffix.empty()) {
2810 RegSuffix.consume_back(".l");
2811 RegSuffix.consume_back(".h");
2812 unsigned Num;
2813 // A single register with an index: rXX
2814 if (getRegNum(RegSuffix, Num))
2815 return true;
2816 } else {
2817 // A range of registers: r[XX:YY].
2818 if (NextToken.is(AsmToken::LBrac))
2819 return true;
2820 }
2821 }
2822
2823 return getSpecialRegForName(Str).isValid();
2824}
2825
2826bool
2827AMDGPUAsmParser::isRegister()
2828{
2829 return isRegister(getToken(), peekToken());
2830}
2831
2832MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2833 unsigned SubReg, unsigned RegWidth,
2834 SMLoc Loc) {
2835 assert(isRegularReg(RegKind));
2836
2837 unsigned AlignSize = 1;
2838 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2839 // SGPR and TTMP registers must be aligned.
2840 // Max required alignment is 4 dwords.
2841 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2842 }
2843
2844 if (RegNum % AlignSize != 0) {
2845 Error(Loc, "invalid register alignment");
2846 return MCRegister();
2847 }
2848
2849 unsigned RegIdx = RegNum / AlignSize;
2850 int RCID = getRegClass(RegKind, RegWidth);
2851 if (RCID == -1) {
2852 Error(Loc, "invalid or unsupported register size");
2853 return MCRegister();
2854 }
2855
2856 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2857 const MCRegisterClass RC = TRI->getRegClass(RCID);
2858 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2859 Error(Loc, "register index is out of range");
2860 return AMDGPU::NoRegister;
2861 }
2862
2863 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2864 Error(Loc, "register index is out of range");
2865 return MCRegister();
2866 }
2867
2868 MCRegister Reg = RC.getRegister(RegIdx);
2869
2870 if (SubReg) {
2871 Reg = TRI->getSubReg(Reg, SubReg);
2872
2873 // Currently all regular registers have their .l and .h subregisters, so
2874 // we should never need to generate an error here.
2875 assert(Reg && "Invalid subregister!");
2876 }
2877
2878 return Reg;
2879}
2880
2881bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2882 unsigned &SubReg) {
2883 int64_t RegLo, RegHi;
2884 if (!skipToken(AsmToken::LBrac, "missing register index"))
2885 return false;
2886
2887 SMLoc FirstIdxLoc = getLoc();
2888 SMLoc SecondIdxLoc;
2889
2890 if (!parseExpr(RegLo))
2891 return false;
2892
2893 if (trySkipToken(AsmToken::Colon)) {
2894 SecondIdxLoc = getLoc();
2895 if (!parseExpr(RegHi))
2896 return false;
2897 } else {
2898 RegHi = RegLo;
2899 }
2900
2901 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2902 return false;
2903
2904 if (!isUInt<32>(RegLo)) {
2905 Error(FirstIdxLoc, "invalid register index");
2906 return false;
2907 }
2908
2909 if (!isUInt<32>(RegHi)) {
2910 Error(SecondIdxLoc, "invalid register index");
2911 return false;
2912 }
2913
2914 if (RegLo > RegHi) {
2915 Error(FirstIdxLoc, "first register index should not exceed second index");
2916 return false;
2917 }
2918
2919 if (RegHi == RegLo) {
2920 StringRef RegSuffix = getTokenStr();
2921 if (RegSuffix == ".l") {
2922 SubReg = AMDGPU::lo16;
2923 lex();
2924 } else if (RegSuffix == ".h") {
2925 SubReg = AMDGPU::hi16;
2926 lex();
2927 }
2928 }
2929
2930 Num = static_cast<unsigned>(RegLo);
2931 RegWidth = 32 * ((RegHi - RegLo) + 1);
2932
2933 return true;
2934}
2935
2936MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2937 unsigned &RegNum,
2938 unsigned &RegWidth,
2939 SmallVectorImpl<AsmToken> &Tokens) {
2940 assert(isToken(AsmToken::Identifier));
2941 MCRegister Reg = getSpecialRegForName(getTokenStr());
2942 if (Reg) {
2943 RegNum = 0;
2944 RegWidth = 32;
2945 RegKind = IS_SPECIAL;
2946 Tokens.push_back(getToken());
2947 lex(); // skip register name
2948 }
2949 return Reg;
2950}
2951
2952MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2953 unsigned &RegNum,
2954 unsigned &RegWidth,
2955 SmallVectorImpl<AsmToken> &Tokens) {
2956 assert(isToken(AsmToken::Identifier));
2957 StringRef RegName = getTokenStr();
2958 auto Loc = getLoc();
2959
2960 const RegInfo *RI = getRegularRegInfo(RegName);
2961 if (!RI) {
2962 Error(Loc, "invalid register name");
2963 return MCRegister();
2964 }
2965
2966 Tokens.push_back(getToken());
2967 lex(); // skip register name
2968
2969 RegKind = RI->Kind;
2970 StringRef RegSuffix = RegName.substr(RI->Name.size());
2971 unsigned SubReg = NoSubRegister;
2972 if (!RegSuffix.empty()) {
2973 if (RegSuffix.consume_back(".l"))
2974 SubReg = AMDGPU::lo16;
2975 else if (RegSuffix.consume_back(".h"))
2976 SubReg = AMDGPU::hi16;
2977
2978 // Single 32-bit register: vXX.
2979 if (!getRegNum(RegSuffix, RegNum)) {
2980 Error(Loc, "invalid register index");
2981 return MCRegister();
2982 }
2983 RegWidth = 32;
2984 } else {
2985 // Range of registers: v[XX:YY]. ":YY" is optional.
2986 if (!ParseRegRange(RegNum, RegWidth, SubReg))
2987 return MCRegister();
2988 }
2989
2990 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2991}
2992
2993MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2994 unsigned &RegNum, unsigned &RegWidth,
2995 SmallVectorImpl<AsmToken> &Tokens) {
2996 MCRegister Reg;
2997 auto ListLoc = getLoc();
2998
2999 if (!skipToken(AsmToken::LBrac,
3000 "expected a register or a list of registers")) {
3001 return MCRegister();
3002 }
3003
3004 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3005
3006 auto Loc = getLoc();
3007 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3008 return MCRegister();
3009 if (RegWidth != 32) {
3010 Error(Loc, "expected a single 32-bit register");
3011 return MCRegister();
3012 }
3013
3014 for (; trySkipToken(AsmToken::Comma); ) {
3015 RegisterKind NextRegKind;
3016 MCRegister NextReg;
3017 unsigned NextRegNum, NextRegWidth;
3018 Loc = getLoc();
3019
3020 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3021 NextRegNum, NextRegWidth,
3022 Tokens)) {
3023 return MCRegister();
3024 }
3025 if (NextRegWidth != 32) {
3026 Error(Loc, "expected a single 32-bit register");
3027 return MCRegister();
3028 }
3029 if (NextRegKind != RegKind) {
3030 Error(Loc, "registers in a list must be of the same kind");
3031 return MCRegister();
3032 }
3033 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3034 return MCRegister();
3035 }
3036
3037 if (!skipToken(AsmToken::RBrac,
3038 "expected a comma or a closing square bracket")) {
3039 return MCRegister();
3040 }
3041
3042 if (isRegularReg(RegKind))
3043 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3044
3045 return Reg;
3046}
3047
3048bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3049 MCRegister &Reg, unsigned &RegNum,
3050 unsigned &RegWidth,
3051 SmallVectorImpl<AsmToken> &Tokens) {
3052 auto Loc = getLoc();
3053 Reg = MCRegister();
3054
3055 if (isToken(AsmToken::Identifier)) {
3056 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3057 if (!Reg)
3058 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3059 } else {
3060 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3061 }
3062
3063 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3064 if (!Reg) {
3065 assert(Parser.hasPendingError());
3066 return false;
3067 }
3068
3069 if (!subtargetHasRegister(*TRI, Reg)) {
3070 if (Reg == AMDGPU::SGPR_NULL) {
3071 Error(Loc, "'null' operand is not supported on this GPU");
3072 } else {
3074 " register not available on this GPU");
3075 }
3076 return false;
3077 }
3078
3079 return true;
3080}
3081
3082bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3083 MCRegister &Reg, unsigned &RegNum,
3084 unsigned &RegWidth,
3085 bool RestoreOnFailure /*=false*/) {
3086 Reg = MCRegister();
3087
3089 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3090 if (RestoreOnFailure) {
3091 while (!Tokens.empty()) {
3092 getLexer().UnLex(Tokens.pop_back_val());
3093 }
3094 }
3095 return true;
3096 }
3097 return false;
3098}
3099
3100std::optional<StringRef>
3101AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3102 switch (RegKind) {
3103 case IS_VGPR:
3104 return StringRef(".amdgcn.next_free_vgpr");
3105 case IS_SGPR:
3106 return StringRef(".amdgcn.next_free_sgpr");
3107 default:
3108 return std::nullopt;
3109 }
3110}
3111
3112void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3113 auto SymbolName = getGprCountSymbolName(RegKind);
3114 assert(SymbolName && "initializing invalid register kind");
3115 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3117 Sym->setRedefinable(true);
3118}
3119
3120bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3121 unsigned DwordRegIndex,
3122 unsigned RegWidth) {
3123 // Symbols are only defined for GCN targets
3124 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3125 return true;
3126
3127 auto SymbolName = getGprCountSymbolName(RegKind);
3128 if (!SymbolName)
3129 return true;
3130 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3131
3132 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3133 int64_t OldCount;
3134
3135 if (!Sym->isVariable())
3136 return !Error(getLoc(),
3137 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3138 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3139 return !Error(
3140 getLoc(),
3141 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3142
3143 if (OldCount <= NewMax)
3145
3146 return true;
3147}
3148
3149std::unique_ptr<AMDGPUOperand>
3150AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3151 const auto &Tok = getToken();
3152 SMLoc StartLoc = Tok.getLoc();
3153 SMLoc EndLoc = Tok.getEndLoc();
3154 RegisterKind RegKind;
3155 MCRegister Reg;
3156 unsigned RegNum, RegWidth;
3157
3158 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3159 return nullptr;
3160 }
3161 if (isHsaAbi(getSTI())) {
3162 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3163 return nullptr;
3164 } else
3165 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3166 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3167}
3168
3169ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3170 bool HasSP3AbsModifier, bool HasLit,
3171 bool HasLit64) {
3172 // TODO: add syntactic sugar for 1/(2*PI)
3173
3174 if (isRegister() || isModifier())
3175 return ParseStatus::NoMatch;
3176
3177 if (!HasLit && !HasLit64) {
3178 HasLit64 = trySkipId("lit64");
3179 HasLit = !HasLit64 && trySkipId("lit");
3180 if (HasLit || HasLit64) {
3181 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3182 return ParseStatus::Failure;
3183 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64);
3184 if (S.isSuccess() &&
3185 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3186 return ParseStatus::Failure;
3187 return S;
3188 }
3189 }
3190
3191 const auto& Tok = getToken();
3192 const auto& NextTok = peekToken();
3193 bool IsReal = Tok.is(AsmToken::Real);
3194 SMLoc S = getLoc();
3195 bool Negate = false;
3196
3197 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3198 lex();
3199 IsReal = true;
3200 Negate = true;
3201 }
3202
3203 AMDGPUOperand::Modifiers Mods;
3204 Mods.Lit = HasLit;
3205 Mods.Lit64 = HasLit64;
3206
3207 if (IsReal) {
3208 // Floating-point expressions are not supported.
3209 // Can only allow floating-point literals with an
3210 // optional sign.
3211
3212 StringRef Num = getTokenStr();
3213 lex();
3214
3215 APFloat RealVal(APFloat::IEEEdouble());
3216 auto roundMode = APFloat::rmNearestTiesToEven;
3217 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3218 return ParseStatus::Failure;
3219 if (Negate)
3220 RealVal.changeSign();
3221
3222 Operands.push_back(
3223 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3224 AMDGPUOperand::ImmTyNone, true));
3225 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3226 Op.setModifiers(Mods);
3227
3228 return ParseStatus::Success;
3229
3230 } else {
3231 int64_t IntVal;
3232 const MCExpr *Expr;
3233 SMLoc S = getLoc();
3234
3235 if (HasSP3AbsModifier) {
3236 // This is a workaround for handling expressions
3237 // as arguments of SP3 'abs' modifier, for example:
3238 // |1.0|
3239 // |-1|
3240 // |1+x|
3241 // This syntax is not compatible with syntax of standard
3242 // MC expressions (due to the trailing '|').
3243 SMLoc EndLoc;
3244 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3245 return ParseStatus::Failure;
3246 } else {
3247 if (Parser.parseExpression(Expr))
3248 return ParseStatus::Failure;
3249 }
3250
3251 if (Expr->evaluateAsAbsolute(IntVal)) {
3252 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3253 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3254 Op.setModifiers(Mods);
3255 } else {
3256 if (HasLit || HasLit64)
3257 return ParseStatus::NoMatch;
3258 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3259 }
3260
3261 return ParseStatus::Success;
3262 }
3263
3264 return ParseStatus::NoMatch;
3265}
3266
3267ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3268 if (!isRegister())
3269 return ParseStatus::NoMatch;
3270
3271 if (auto R = parseRegister()) {
3272 assert(R->isReg());
3273 Operands.push_back(std::move(R));
3274 return ParseStatus::Success;
3275 }
3276 return ParseStatus::Failure;
3277}
3278
3279ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3280 bool HasSP3AbsMod, bool HasLit,
3281 bool HasLit64) {
3282 ParseStatus Res = parseReg(Operands);
3283 if (!Res.isNoMatch())
3284 return Res;
3285 if (isModifier())
3286 return ParseStatus::NoMatch;
3287 return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64);
3288}
3289
3290bool
3291AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3292 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3293 const auto &str = Token.getString();
3294 return str == "abs" || str == "neg" || str == "sext";
3295 }
3296 return false;
3297}
3298
3299bool
3300AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3301 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3302}
3303
3304bool
3305AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3306 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3307}
3308
3309bool
3310AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3311 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3312}
3313
3314// Check if this is an operand modifier or an opcode modifier
3315// which may look like an expression but it is not. We should
3316// avoid parsing these modifiers as expressions. Currently
3317// recognized sequences are:
3318// |...|
3319// abs(...)
3320// neg(...)
3321// sext(...)
3322// -reg
3323// -|...|
3324// -abs(...)
3325// name:...
3326//
3327bool
3328AMDGPUAsmParser::isModifier() {
3329
3330 AsmToken Tok = getToken();
3331 AsmToken NextToken[2];
3332 peekTokens(NextToken);
3333
3334 return isOperandModifier(Tok, NextToken[0]) ||
3335 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3336 isOpcodeModifierWithVal(Tok, NextToken[0]);
3337}
3338
3339// Check if the current token is an SP3 'neg' modifier.
3340// Currently this modifier is allowed in the following context:
3341//
3342// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3343// 2. Before an 'abs' modifier: -abs(...)
3344// 3. Before an SP3 'abs' modifier: -|...|
3345//
3346// In all other cases "-" is handled as a part
3347// of an expression that follows the sign.
3348//
3349// Note: When "-" is followed by an integer literal,
3350// this is interpreted as integer negation rather
3351// than a floating-point NEG modifier applied to N.
3352// Beside being contr-intuitive, such use of floating-point
3353// NEG modifier would have resulted in different meaning
3354// of integer literals used with VOP1/2/C and VOP3,
3355// for example:
3356// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3357// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3358// Negative fp literals with preceding "-" are
3359// handled likewise for uniformity
3360//
3361bool
3362AMDGPUAsmParser::parseSP3NegModifier() {
3363
3364 AsmToken NextToken[2];
3365 peekTokens(NextToken);
3366
3367 if (isToken(AsmToken::Minus) &&
3368 (isRegister(NextToken[0], NextToken[1]) ||
3369 NextToken[0].is(AsmToken::Pipe) ||
3370 isId(NextToken[0], "abs"))) {
3371 lex();
3372 return true;
3373 }
3374
3375 return false;
3376}
3377
3378ParseStatus
3379AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3380 bool AllowImm) {
3381 bool Neg, SP3Neg;
3382 bool Abs, SP3Abs;
3383 bool Lit64, Lit;
3384 SMLoc Loc;
3385
3386 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3387 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3388 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3389
3390 SP3Neg = parseSP3NegModifier();
3391
3392 Loc = getLoc();
3393 Neg = trySkipId("neg");
3394 if (Neg && SP3Neg)
3395 return Error(Loc, "expected register or immediate");
3396 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3397 return ParseStatus::Failure;
3398
3399 Abs = trySkipId("abs");
3400 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3401 return ParseStatus::Failure;
3402
3403 Lit64 = trySkipId("lit64");
3404 if (Lit64) {
3405 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3406 return ParseStatus::Failure;
3407 if (!has64BitLiterals())
3408 return Error(Loc, "lit64 is not supported on this GPU");
3409 }
3410
3411 Lit = !Lit64 && trySkipId("lit");
3412 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3413 return ParseStatus::Failure;
3414
3415 Loc = getLoc();
3416 SP3Abs = trySkipToken(AsmToken::Pipe);
3417 if (Abs && SP3Abs)
3418 return Error(Loc, "expected register or immediate");
3419
3420 ParseStatus Res;
3421 if (AllowImm) {
3422 Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64);
3423 } else {
3424 Res = parseReg(Operands);
3425 }
3426 if (!Res.isSuccess())
3427 return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
3429 : Res;
3430
3431 if ((Lit || Lit64) && !Operands.back()->isImm())
3432 Error(Loc, "expected immediate with lit modifier");
3433
3434 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3435 return ParseStatus::Failure;
3436 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3437 return ParseStatus::Failure;
3438 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3439 return ParseStatus::Failure;
3440 if ((Lit || Lit64) &&
3441 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3442 return ParseStatus::Failure;
3443
3444 AMDGPUOperand::Modifiers Mods;
3445 Mods.Abs = Abs || SP3Abs;
3446 Mods.Neg = Neg || SP3Neg;
3447 Mods.Lit = Lit;
3448 Mods.Lit64 = Lit64;
3449
3450 if (Mods.hasFPModifiers() || Lit || Lit64) {
3451 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3452 if (Op.isExpr())
3453 return Error(Op.getStartLoc(), "expected an absolute expression");
3454 Op.setModifiers(Mods);
3455 }
3456 return ParseStatus::Success;
3457}
3458
3459ParseStatus
3460AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3461 bool AllowImm) {
3462 bool Sext = trySkipId("sext");
3463 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3464 return ParseStatus::Failure;
3465
3466 ParseStatus Res;
3467 if (AllowImm) {
3468 Res = parseRegOrImm(Operands);
3469 } else {
3470 Res = parseReg(Operands);
3471 }
3472 if (!Res.isSuccess())
3473 return Sext ? ParseStatus::Failure : Res;
3474
3475 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3476 return ParseStatus::Failure;
3477
3478 AMDGPUOperand::Modifiers Mods;
3479 Mods.Sext = Sext;
3480
3481 if (Mods.hasIntModifiers()) {
3482 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3483 if (Op.isExpr())
3484 return Error(Op.getStartLoc(), "expected an absolute expression");
3485 Op.setModifiers(Mods);
3486 }
3487
3488 return ParseStatus::Success;
3489}
3490
3491ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3492 return parseRegOrImmWithFPInputMods(Operands, false);
3493}
3494
3495ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3496 return parseRegOrImmWithIntInputMods(Operands, false);
3497}
3498
3499ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3500 auto Loc = getLoc();
3501 if (trySkipId("off")) {
3502 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3503 AMDGPUOperand::ImmTyOff, false));
3504 return ParseStatus::Success;
3505 }
3506
3507 if (!isRegister())
3508 return ParseStatus::NoMatch;
3509
3510 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3511 if (Reg) {
3512 Operands.push_back(std::move(Reg));
3513 return ParseStatus::Success;
3514 }
3515
3516 return ParseStatus::Failure;
3517}
3518
3519unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3520 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3521
3522 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3523 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3524 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3525 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3526 return Match_InvalidOperand;
3527
3528 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3529 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3530 // v_mac_f32/16 allow only dst_sel == DWORD;
3531 auto OpNum =
3532 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3533 const auto &Op = Inst.getOperand(OpNum);
3534 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3535 return Match_InvalidOperand;
3536 }
3537 }
3538
3539 // Asm can first try to match VOPD or VOPD3. By failing early here with
3540 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3541 // Checking later during validateInstruction does not give a chance to retry
3542 // parsing as a different encoding.
3543 if (tryAnotherVOPDEncoding(Inst))
3544 return Match_InvalidOperand;
3545
3546 return Match_Success;
3547}
3548
3558
3559// What asm variants we should check
3560ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3561 if (isForcedDPP() && isForcedVOP3()) {
3562 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3563 return ArrayRef(Variants);
3564 }
3565 if (getForcedEncodingSize() == 32) {
3566 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3567 return ArrayRef(Variants);
3568 }
3569
3570 if (isForcedVOP3()) {
3571 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3572 return ArrayRef(Variants);
3573 }
3574
3575 if (isForcedSDWA()) {
3576 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3578 return ArrayRef(Variants);
3579 }
3580
3581 if (isForcedDPP()) {
3582 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3583 return ArrayRef(Variants);
3584 }
3585
3586 return getAllVariants();
3587}
3588
3589StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3590 if (isForcedDPP() && isForcedVOP3())
3591 return "e64_dpp";
3592
3593 if (getForcedEncodingSize() == 32)
3594 return "e32";
3595
3596 if (isForcedVOP3())
3597 return "e64";
3598
3599 if (isForcedSDWA())
3600 return "sdwa";
3601
3602 if (isForcedDPP())
3603 return "dpp";
3604
3605 return "";
3606}
3607
3608unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3609 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3610 for (MCPhysReg Reg : Desc.implicit_uses()) {
3611 switch (Reg) {
3612 case AMDGPU::FLAT_SCR:
3613 case AMDGPU::VCC:
3614 case AMDGPU::VCC_LO:
3615 case AMDGPU::VCC_HI:
3616 case AMDGPU::M0:
3617 return Reg;
3618 default:
3619 break;
3620 }
3621 }
3622 return AMDGPU::NoRegister;
3623}
3624
3625// NB: This code is correct only when used to check constant
3626// bus limitations because GFX7 support no f16 inline constants.
3627// Note that there are no cases when a GFX7 opcode violates
3628// constant bus limitations due to the use of an f16 constant.
3629bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3630 unsigned OpIdx) const {
3631 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3632
3635 return false;
3636 }
3637
3638 const MCOperand &MO = Inst.getOperand(OpIdx);
3639
3640 int64_t Val = MO.getImm();
3641 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3642
3643 switch (OpSize) { // expected operand size
3644 case 8:
3645 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3646 case 4:
3647 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3648 case 2: {
3649 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3652 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3653
3657
3661
3665
3668 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3669
3672 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3673
3675 return false;
3676
3677 llvm_unreachable("invalid operand type");
3678 }
3679 default:
3680 llvm_unreachable("invalid operand size");
3681 }
3682}
3683
3684unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3685 if (!isGFX10Plus())
3686 return 1;
3687
3688 switch (Opcode) {
3689 // 64-bit shift instructions can use only one scalar value input
3690 case AMDGPU::V_LSHLREV_B64_e64:
3691 case AMDGPU::V_LSHLREV_B64_gfx10:
3692 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3693 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3694 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3695 case AMDGPU::V_LSHRREV_B64_e64:
3696 case AMDGPU::V_LSHRREV_B64_gfx10:
3697 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3698 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3699 case AMDGPU::V_ASHRREV_I64_e64:
3700 case AMDGPU::V_ASHRREV_I64_gfx10:
3701 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3702 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3703 case AMDGPU::V_LSHL_B64_e64:
3704 case AMDGPU::V_LSHR_B64_e64:
3705 case AMDGPU::V_ASHR_I64_e64:
3706 return 1;
3707 default:
3708 return 2;
3709 }
3710}
3711
3712constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3714
3715// Get regular operand indices in the same order as specified
3716// in the instruction (but append mandatory literals to the end).
3718 bool AddMandatoryLiterals = false) {
3719
3720 int16_t ImmIdx =
3721 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3722
3723 if (isVOPD(Opcode)) {
3724 int16_t ImmXIdx =
3725 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3726
3727 return {getNamedOperandIdx(Opcode, OpName::src0X),
3728 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3729 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3730 getNamedOperandIdx(Opcode, OpName::src0Y),
3731 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3732 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3733 ImmXIdx,
3734 ImmIdx};
3735 }
3736
3737 return {getNamedOperandIdx(Opcode, OpName::src0),
3738 getNamedOperandIdx(Opcode, OpName::src1),
3739 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3740}
3741
3742bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3743 const MCOperand &MO = Inst.getOperand(OpIdx);
3744 if (MO.isImm())
3745 return !isInlineConstant(Inst, OpIdx);
3746 if (MO.isReg()) {
3747 auto Reg = MO.getReg();
3748 if (!Reg)
3749 return false;
3750 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3751 auto PReg = mc2PseudoReg(Reg);
3752 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3753 }
3754 return true;
3755}
3756
3757// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3758// Writelane is special in that it can use SGPR and M0 (which would normally
3759// count as using the constant bus twice - but in this case it is allowed since
3760// the lane selector doesn't count as a use of the constant bus). However, it is
3761// still required to abide by the 1 SGPR rule.
3762static bool checkWriteLane(const MCInst &Inst) {
3763 const unsigned Opcode = Inst.getOpcode();
3764 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3765 return false;
3766 const MCOperand &LaneSelOp = Inst.getOperand(2);
3767 if (!LaneSelOp.isReg())
3768 return false;
3769 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3770 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3771}
3772
3773bool AMDGPUAsmParser::validateConstantBusLimitations(
3774 const MCInst &Inst, const OperandVector &Operands) {
3775 const unsigned Opcode = Inst.getOpcode();
3776 const MCInstrDesc &Desc = MII.get(Opcode);
3777 MCRegister LastSGPR;
3778 unsigned ConstantBusUseCount = 0;
3779 unsigned NumLiterals = 0;
3780 unsigned LiteralSize;
3781
3782 if (!(Desc.TSFlags &
3785 !isVOPD(Opcode))
3786 return true;
3787
3788 if (checkWriteLane(Inst))
3789 return true;
3790
3791 // Check special imm operands (used by madmk, etc)
3792 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3793 ++NumLiterals;
3794 LiteralSize = 4;
3795 }
3796
3797 SmallDenseSet<unsigned> SGPRsUsed;
3798 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3799 if (SGPRUsed != AMDGPU::NoRegister) {
3800 SGPRsUsed.insert(SGPRUsed);
3801 ++ConstantBusUseCount;
3802 }
3803
3804 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3805
3806 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3807
3808 for (int OpIdx : OpIndices) {
3809 if (OpIdx == -1)
3810 continue;
3811
3812 const MCOperand &MO = Inst.getOperand(OpIdx);
3813 if (usesConstantBus(Inst, OpIdx)) {
3814 if (MO.isReg()) {
3815 LastSGPR = mc2PseudoReg(MO.getReg());
3816 // Pairs of registers with a partial intersections like these
3817 // s0, s[0:1]
3818 // flat_scratch_lo, flat_scratch
3819 // flat_scratch_lo, flat_scratch_hi
3820 // are theoretically valid but they are disabled anyway.
3821 // Note that this code mimics SIInstrInfo::verifyInstruction
3822 if (SGPRsUsed.insert(LastSGPR).second) {
3823 ++ConstantBusUseCount;
3824 }
3825 } else { // Expression or a literal
3826
3827 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3828 continue; // special operand like VINTERP attr_chan
3829
3830 // An instruction may use only one literal.
3831 // This has been validated on the previous step.
3832 // See validateVOPLiteral.
3833 // This literal may be used as more than one operand.
3834 // If all these operands are of the same size,
3835 // this literal counts as one scalar value.
3836 // Otherwise it counts as 2 scalar values.
3837 // See "GFX10 Shader Programming", section 3.6.2.3.
3838
3840 if (Size < 4)
3841 Size = 4;
3842
3843 if (NumLiterals == 0) {
3844 NumLiterals = 1;
3845 LiteralSize = Size;
3846 } else if (LiteralSize != Size) {
3847 NumLiterals = 2;
3848 }
3849 }
3850 }
3851
3852 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3853 Error(getOperandLoc(Operands, OpIdx),
3854 "invalid operand (violates constant bus restrictions)");
3855 return false;
3856 }
3857 }
3858 return true;
3859}
3860
3861std::optional<unsigned>
3862AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3863
3864 const unsigned Opcode = Inst.getOpcode();
3865 if (!isVOPD(Opcode))
3866 return {};
3867
3868 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3869
3870 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3871 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3872 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3873 ? Opr.getReg()
3874 : MCRegister();
3875 };
3876
3877 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3878 // source-cache.
3879 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3880 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3881 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3882 bool AllowSameVGPR = isGFX1250();
3883
3884 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3885 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3886 int I = getNamedOperandIdx(Opcode, OpName);
3887 const MCOperand &Op = Inst.getOperand(I);
3888 if (!Op.isImm())
3889 continue;
3890 int64_t Imm = Op.getImm();
3891 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3892 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3893 return (unsigned)I;
3894 }
3895
3896 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3897 OpName::vsrc2Y, OpName::imm}) {
3898 int I = getNamedOperandIdx(Opcode, OpName);
3899 if (I == -1)
3900 continue;
3901 const MCOperand &Op = Inst.getOperand(I);
3902 if (Op.isImm())
3903 return (unsigned)I;
3904 }
3905 }
3906
3907 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3908 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3909 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3910
3911 return InvalidCompOprIdx;
3912}
3913
3914bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3915 const OperandVector &Operands) {
3916
3917 unsigned Opcode = Inst.getOpcode();
3918 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3919
3920 if (AsVOPD3) {
3921 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3922 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3923 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3924 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3925 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3926 }
3927 }
3928
3929 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3930 if (!InvalidCompOprIdx.has_value())
3931 return true;
3932
3933 auto CompOprIdx = *InvalidCompOprIdx;
3934 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3935 auto ParsedIdx =
3936 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3937 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3938 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3939
3940 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3941 if (CompOprIdx == VOPD::Component::DST) {
3942 if (AsVOPD3)
3943 Error(Loc, "dst registers must be distinct");
3944 else
3945 Error(Loc, "one dst register must be even and the other odd");
3946 } else {
3947 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3948 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3949 " operands must use different VGPR banks");
3950 }
3951
3952 return false;
3953}
3954
3955// \returns true if \p Inst does not satisfy VOPD constraints, but can be
3956// potentially used as VOPD3 with the same operands.
3957bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
3958 // First check if it fits VOPD
3959 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
3960 if (!InvalidCompOprIdx.has_value())
3961 return false;
3962
3963 // Then if it fits VOPD3
3964 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
3965 if (InvalidCompOprIdx.has_value()) {
3966 // If failed operand is dst it is better to show error about VOPD3
3967 // instruction as it has more capabilities and error message will be
3968 // more informative. If the dst is not legal for VOPD3, then it is not
3969 // legal for VOPD either.
3970 if (*InvalidCompOprIdx == VOPD::Component::DST)
3971 return true;
3972
3973 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
3974 // with a conflict in tied implicit src2 of fmac and no asm operand to
3975 // to point to.
3976 return false;
3977 }
3978 return true;
3979}
3980
3981// \returns true is a VOPD3 instruction can be also represented as a shorter
3982// VOPD encoding.
3983bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
3984 const unsigned Opcode = Inst.getOpcode();
3985 const auto &II = getVOPDInstInfo(Opcode, &MII);
3986 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
3987 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
3988 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
3989 return false;
3990
3991 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
3992 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
3993 // be parsed as VOPD which does not accept src2.
3994 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
3995 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
3996 return false;
3997
3998 // If any modifiers are set this cannot be VOPD.
3999 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4000 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4001 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4002 int I = getNamedOperandIdx(Opcode, OpName);
4003 if (I == -1)
4004 continue;
4005 if (Inst.getOperand(I).getImm())
4006 return false;
4007 }
4008
4009 return !tryVOPD3(Inst);
4010}
4011
4012// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4013// form but switch to VOPD3 otherwise.
4014bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4015 const unsigned Opcode = Inst.getOpcode();
4016 if (!isGFX1250() || !isVOPD(Opcode))
4017 return false;
4018
4019 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4020 return tryVOPD(Inst);
4021 return tryVOPD3(Inst);
4022}
4023
4024bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4025
4026 const unsigned Opc = Inst.getOpcode();
4027 const MCInstrDesc &Desc = MII.get(Opc);
4028
4029 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4030 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4031 assert(ClampIdx != -1);
4032 return Inst.getOperand(ClampIdx).getImm() == 0;
4033 }
4034
4035 return true;
4036}
4037
4040
4041bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
4042 const SMLoc &IDLoc) {
4043
4044 const unsigned Opc = Inst.getOpcode();
4045 const MCInstrDesc &Desc = MII.get(Opc);
4046
4047 if ((Desc.TSFlags & MIMGFlags) == 0)
4048 return true;
4049
4050 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4051 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4052 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4053
4054 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4055 return true;
4056
4057 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4058 return true;
4059
4060 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
4061 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4062 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4063 if (DMask == 0)
4064 DMask = 1;
4065
4066 bool IsPackedD16 = false;
4067 unsigned DataSize =
4068 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4069 if (hasPackedD16()) {
4070 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4071 IsPackedD16 = D16Idx >= 0;
4072 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4073 DataSize = (DataSize + 1) / 2;
4074 }
4075
4076 if ((VDataSize / 4) == DataSize + TFESize)
4077 return true;
4078
4079 StringRef Modifiers;
4080 if (isGFX90A())
4081 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4082 else
4083 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4084
4085 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4086 return false;
4087}
4088
4089bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
4090 const SMLoc &IDLoc) {
4091 const unsigned Opc = Inst.getOpcode();
4092 const MCInstrDesc &Desc = MII.get(Opc);
4093
4094 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4095 return true;
4096
4097 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4098
4099 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4101 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4102 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4103 ? AMDGPU::OpName::srsrc
4104 : AMDGPU::OpName::rsrc;
4105 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4106 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4107 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4108
4109 assert(VAddr0Idx != -1);
4110 assert(SrsrcIdx != -1);
4111 assert(SrsrcIdx > VAddr0Idx);
4112
4113 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4114 if (BaseOpcode->BVH) {
4115 if (IsA16 == BaseOpcode->A16)
4116 return true;
4117 Error(IDLoc, "image address size does not match a16");
4118 return false;
4119 }
4120
4121 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4122 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4123 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4124 unsigned ActualAddrSize =
4125 IsNSA ? SrsrcIdx - VAddr0Idx
4126 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
4127
4128 unsigned ExpectedAddrSize =
4129 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4130
4131 if (IsNSA) {
4132 if (hasPartialNSAEncoding() &&
4133 ExpectedAddrSize >
4135 int VAddrLastIdx = SrsrcIdx - 1;
4136 unsigned VAddrLastSize =
4137 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
4138
4139 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4140 }
4141 } else {
4142 if (ExpectedAddrSize > 12)
4143 ExpectedAddrSize = 16;
4144
4145 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4146 // This provides backward compatibility for assembly created
4147 // before 160b/192b/224b types were directly supported.
4148 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4149 return true;
4150 }
4151
4152 if (ActualAddrSize == ExpectedAddrSize)
4153 return true;
4154
4155 Error(IDLoc, "image address size does not match dim and a16");
4156 return false;
4157}
4158
4159bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4160
4161 const unsigned Opc = Inst.getOpcode();
4162 const MCInstrDesc &Desc = MII.get(Opc);
4163
4164 if ((Desc.TSFlags & MIMGFlags) == 0)
4165 return true;
4166 if (!Desc.mayLoad() || !Desc.mayStore())
4167 return true; // Not atomic
4168
4169 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4170 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4171
4172 // This is an incomplete check because image_atomic_cmpswap
4173 // may only use 0x3 and 0xf while other atomic operations
4174 // may use 0x1 and 0x3. However these limitations are
4175 // verified when we check that dmask matches dst size.
4176 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4177}
4178
4179bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4180
4181 const unsigned Opc = Inst.getOpcode();
4182 const MCInstrDesc &Desc = MII.get(Opc);
4183
4184 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4185 return true;
4186
4187 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4188 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4189
4190 // GATHER4 instructions use dmask in a different fashion compared to
4191 // other MIMG instructions. The only useful DMASK values are
4192 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4193 // (red,red,red,red) etc.) The ISA document doesn't mention
4194 // this.
4195 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4196}
4197
4198bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4199 const OperandVector &Operands) {
4200 if (!isGFX10Plus())
4201 return true;
4202
4203 const unsigned Opc = Inst.getOpcode();
4204 const MCInstrDesc &Desc = MII.get(Opc);
4205
4206 if ((Desc.TSFlags & MIMGFlags) == 0)
4207 return true;
4208
4209 // image_bvh_intersect_ray instructions do not have dim
4211 return true;
4212
4213 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4214 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4215 if (Op.isDim())
4216 return true;
4217 }
4218 return false;
4219}
4220
4221bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4222 const unsigned Opc = Inst.getOpcode();
4223 const MCInstrDesc &Desc = MII.get(Opc);
4224
4225 if ((Desc.TSFlags & MIMGFlags) == 0)
4226 return true;
4227
4228 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4229 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4231
4232 if (!BaseOpcode->MSAA)
4233 return true;
4234
4235 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4236 assert(DimIdx != -1);
4237
4238 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4239 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4240
4241 return DimInfo->MSAA;
4242}
4243
4244static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4245{
4246 switch (Opcode) {
4247 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4248 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4249 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4250 return true;
4251 default:
4252 return false;
4253 }
4254}
4255
4256// movrels* opcodes should only allow VGPRS as src0.
4257// This is specified in .td description for vop1/vop3,
4258// but sdwa is handled differently. See isSDWAOperand.
4259bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4260 const OperandVector &Operands) {
4261
4262 const unsigned Opc = Inst.getOpcode();
4263 const MCInstrDesc &Desc = MII.get(Opc);
4264
4265 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4266 return true;
4267
4268 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4269 assert(Src0Idx != -1);
4270
4271 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4272 if (Src0.isReg()) {
4273 auto Reg = mc2PseudoReg(Src0.getReg());
4274 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4275 if (!isSGPR(Reg, TRI))
4276 return true;
4277 }
4278
4279 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4280 return false;
4281}
4282
4283bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4284 const OperandVector &Operands) {
4285
4286 const unsigned Opc = Inst.getOpcode();
4287
4288 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4289 return true;
4290
4291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4292 assert(Src0Idx != -1);
4293
4294 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4295 if (!Src0.isReg())
4296 return true;
4297
4298 auto Reg = mc2PseudoReg(Src0.getReg());
4299 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4300 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4301 Error(getOperandLoc(Operands, Src0Idx),
4302 "source operand must be either a VGPR or an inline constant");
4303 return false;
4304 }
4305
4306 return true;
4307}
4308
4309bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4310 const OperandVector &Operands) {
4311 unsigned Opcode = Inst.getOpcode();
4312 const MCInstrDesc &Desc = MII.get(Opcode);
4313
4314 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4315 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4316 return true;
4317
4318 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4319 if (Src2Idx == -1)
4320 return true;
4321
4322 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4323 Error(getOperandLoc(Operands, Src2Idx),
4324 "inline constants are not allowed for this operand");
4325 return false;
4326 }
4327
4328 return true;
4329}
4330
4331bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4332 const OperandVector &Operands) {
4333 const unsigned Opc = Inst.getOpcode();
4334 const MCInstrDesc &Desc = MII.get(Opc);
4335
4336 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4337 return true;
4338
4339 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4340 if (BlgpIdx != -1) {
4341 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4342 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4343
4344 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4345 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4346
4347 // Validate the correct register size was used for the floating point
4348 // format operands
4349
4350 bool Success = true;
4351 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4352 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4353 Error(getOperandLoc(Operands, Src0Idx),
4354 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4355 Success = false;
4356 }
4357
4358 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4359 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4360 Error(getOperandLoc(Operands, Src1Idx),
4361 "wrong register tuple size for blgp value " + Twine(BLGP));
4362 Success = false;
4363 }
4364
4365 return Success;
4366 }
4367 }
4368
4369 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4370 if (Src2Idx == -1)
4371 return true;
4372
4373 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4374 if (!Src2.isReg())
4375 return true;
4376
4377 MCRegister Src2Reg = Src2.getReg();
4378 MCRegister DstReg = Inst.getOperand(0).getReg();
4379 if (Src2Reg == DstReg)
4380 return true;
4381
4382 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4383 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4384 return true;
4385
4386 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4387 Error(getOperandLoc(Operands, Src2Idx),
4388 "source 2 operand must not partially overlap with dst");
4389 return false;
4390 }
4391
4392 return true;
4393}
4394
4395bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4396 switch (Inst.getOpcode()) {
4397 default:
4398 return true;
4399 case V_DIV_SCALE_F32_gfx6_gfx7:
4400 case V_DIV_SCALE_F32_vi:
4401 case V_DIV_SCALE_F32_gfx10:
4402 case V_DIV_SCALE_F64_gfx6_gfx7:
4403 case V_DIV_SCALE_F64_vi:
4404 case V_DIV_SCALE_F64_gfx10:
4405 break;
4406 }
4407
4408 // TODO: Check that src0 = src1 or src2.
4409
4410 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4411 AMDGPU::OpName::src2_modifiers,
4412 AMDGPU::OpName::src2_modifiers}) {
4413 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4414 .getImm() &
4416 return false;
4417 }
4418 }
4419
4420 return true;
4421}
4422
4423bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4424
4425 const unsigned Opc = Inst.getOpcode();
4426 const MCInstrDesc &Desc = MII.get(Opc);
4427
4428 if ((Desc.TSFlags & MIMGFlags) == 0)
4429 return true;
4430
4431 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4432 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4433 if (isCI() || isSI())
4434 return false;
4435 }
4436
4437 return true;
4438}
4439
4440bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4441 const unsigned Opc = Inst.getOpcode();
4442 const MCInstrDesc &Desc = MII.get(Opc);
4443
4444 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4445 return true;
4446
4447 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4448
4449 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4450}
4451
4452static bool IsRevOpcode(const unsigned Opcode)
4453{
4454 switch (Opcode) {
4455 case AMDGPU::V_SUBREV_F32_e32:
4456 case AMDGPU::V_SUBREV_F32_e64:
4457 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4458 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4459 case AMDGPU::V_SUBREV_F32_e32_vi:
4460 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4461 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4462 case AMDGPU::V_SUBREV_F32_e64_vi:
4463
4464 case AMDGPU::V_SUBREV_CO_U32_e32:
4465 case AMDGPU::V_SUBREV_CO_U32_e64:
4466 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4467 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4468
4469 case AMDGPU::V_SUBBREV_U32_e32:
4470 case AMDGPU::V_SUBBREV_U32_e64:
4471 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4472 case AMDGPU::V_SUBBREV_U32_e32_vi:
4473 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4474 case AMDGPU::V_SUBBREV_U32_e64_vi:
4475
4476 case AMDGPU::V_SUBREV_U32_e32:
4477 case AMDGPU::V_SUBREV_U32_e64:
4478 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4479 case AMDGPU::V_SUBREV_U32_e32_vi:
4480 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4481 case AMDGPU::V_SUBREV_U32_e64_vi:
4482
4483 case AMDGPU::V_SUBREV_F16_e32:
4484 case AMDGPU::V_SUBREV_F16_e64:
4485 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4486 case AMDGPU::V_SUBREV_F16_e32_vi:
4487 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4488 case AMDGPU::V_SUBREV_F16_e64_vi:
4489
4490 case AMDGPU::V_SUBREV_U16_e32:
4491 case AMDGPU::V_SUBREV_U16_e64:
4492 case AMDGPU::V_SUBREV_U16_e32_vi:
4493 case AMDGPU::V_SUBREV_U16_e64_vi:
4494
4495 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4496 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4497 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4498
4499 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4500 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4501
4502 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4503 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4504
4505 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4506 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4507
4508 case AMDGPU::V_LSHRREV_B32_e32:
4509 case AMDGPU::V_LSHRREV_B32_e64:
4510 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4511 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4512 case AMDGPU::V_LSHRREV_B32_e32_vi:
4513 case AMDGPU::V_LSHRREV_B32_e64_vi:
4514 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4515 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4516
4517 case AMDGPU::V_ASHRREV_I32_e32:
4518 case AMDGPU::V_ASHRREV_I32_e64:
4519 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4520 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4521 case AMDGPU::V_ASHRREV_I32_e32_vi:
4522 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4523 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4524 case AMDGPU::V_ASHRREV_I32_e64_vi:
4525
4526 case AMDGPU::V_LSHLREV_B32_e32:
4527 case AMDGPU::V_LSHLREV_B32_e64:
4528 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4529 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4530 case AMDGPU::V_LSHLREV_B32_e32_vi:
4531 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4532 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4533 case AMDGPU::V_LSHLREV_B32_e64_vi:
4534
4535 case AMDGPU::V_LSHLREV_B16_e32:
4536 case AMDGPU::V_LSHLREV_B16_e64:
4537 case AMDGPU::V_LSHLREV_B16_e32_vi:
4538 case AMDGPU::V_LSHLREV_B16_e64_vi:
4539 case AMDGPU::V_LSHLREV_B16_gfx10:
4540
4541 case AMDGPU::V_LSHRREV_B16_e32:
4542 case AMDGPU::V_LSHRREV_B16_e64:
4543 case AMDGPU::V_LSHRREV_B16_e32_vi:
4544 case AMDGPU::V_LSHRREV_B16_e64_vi:
4545 case AMDGPU::V_LSHRREV_B16_gfx10:
4546
4547 case AMDGPU::V_ASHRREV_I16_e32:
4548 case AMDGPU::V_ASHRREV_I16_e64:
4549 case AMDGPU::V_ASHRREV_I16_e32_vi:
4550 case AMDGPU::V_ASHRREV_I16_e64_vi:
4551 case AMDGPU::V_ASHRREV_I16_gfx10:
4552
4553 case AMDGPU::V_LSHLREV_B64_e64:
4554 case AMDGPU::V_LSHLREV_B64_gfx10:
4555 case AMDGPU::V_LSHLREV_B64_vi:
4556
4557 case AMDGPU::V_LSHRREV_B64_e64:
4558 case AMDGPU::V_LSHRREV_B64_gfx10:
4559 case AMDGPU::V_LSHRREV_B64_vi:
4560
4561 case AMDGPU::V_ASHRREV_I64_e64:
4562 case AMDGPU::V_ASHRREV_I64_gfx10:
4563 case AMDGPU::V_ASHRREV_I64_vi:
4564
4565 case AMDGPU::V_PK_LSHLREV_B16:
4566 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4567 case AMDGPU::V_PK_LSHLREV_B16_vi:
4568
4569 case AMDGPU::V_PK_LSHRREV_B16:
4570 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4571 case AMDGPU::V_PK_LSHRREV_B16_vi:
4572 case AMDGPU::V_PK_ASHRREV_I16:
4573 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4574 case AMDGPU::V_PK_ASHRREV_I16_vi:
4575 return true;
4576 default:
4577 return false;
4578 }
4579}
4580
4581bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4582 const OperandVector &Operands) {
4583 using namespace SIInstrFlags;
4584 const unsigned Opcode = Inst.getOpcode();
4585 const MCInstrDesc &Desc = MII.get(Opcode);
4586
4587 // lds_direct register is defined so that it can be used
4588 // with 9-bit operands only. Ignore encodings which do not accept these.
4589 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4590 if ((Desc.TSFlags & Enc) == 0)
4591 return true;
4592
4593 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4594 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4595 if (SrcIdx == -1)
4596 break;
4597 const auto &Src = Inst.getOperand(SrcIdx);
4598 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4599
4600 if (isGFX90A() || isGFX11Plus()) {
4601 Error(getOperandLoc(Operands, SrcIdx),
4602 "lds_direct is not supported on this GPU");
4603 return false;
4604 }
4605
4606 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4607 Error(getOperandLoc(Operands, SrcIdx),
4608 "lds_direct cannot be used with this instruction");
4609 return false;
4610 }
4611
4612 if (SrcName != OpName::src0) {
4613 Error(getOperandLoc(Operands, SrcIdx),
4614 "lds_direct may be used as src0 only");
4615 return false;
4616 }
4617 }
4618 }
4619
4620 return true;
4621}
4622
4623SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4624 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4625 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4626 if (Op.isFlatOffset())
4627 return Op.getStartLoc();
4628 }
4629 return getLoc();
4630}
4631
4632bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4633 const OperandVector &Operands) {
4634 auto Opcode = Inst.getOpcode();
4635 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4636 if (OpNum == -1)
4637 return true;
4638
4639 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4640 if ((TSFlags & SIInstrFlags::FLAT))
4641 return validateFlatOffset(Inst, Operands);
4642
4643 if ((TSFlags & SIInstrFlags::SMRD))
4644 return validateSMEMOffset(Inst, Operands);
4645
4646 const auto &Op = Inst.getOperand(OpNum);
4647 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4648 if (isGFX12Plus() &&
4649 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4650 const unsigned OffsetSize = 24;
4651 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4652 Error(getFlatOffsetLoc(Operands),
4653 Twine("expected a ") + Twine(OffsetSize - 1) +
4654 "-bit unsigned offset for buffer ops");
4655 return false;
4656 }
4657 } else {
4658 const unsigned OffsetSize = 16;
4659 if (!isUIntN(OffsetSize, Op.getImm())) {
4660 Error(getFlatOffsetLoc(Operands),
4661 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4662 return false;
4663 }
4664 }
4665 return true;
4666}
4667
4668bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4669 const OperandVector &Operands) {
4670 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4671 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4672 return true;
4673
4674 auto Opcode = Inst.getOpcode();
4675 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4676 assert(OpNum != -1);
4677
4678 const auto &Op = Inst.getOperand(OpNum);
4679 if (!hasFlatOffsets() && Op.getImm() != 0) {
4680 Error(getFlatOffsetLoc(Operands),
4681 "flat offset modifier is not supported on this GPU");
4682 return false;
4683 }
4684
4685 // For pre-GFX12 FLAT instructions the offset must be positive;
4686 // MSB is ignored and forced to zero.
4687 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4688 bool AllowNegative =
4690 isGFX12Plus();
4691 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4692 Error(getFlatOffsetLoc(Operands),
4693 Twine("expected a ") +
4694 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4695 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4696 return false;
4697 }
4698
4699 return true;
4700}
4701
4702SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4703 // Start with second operand because SMEM Offset cannot be dst or src0.
4704 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4705 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4706 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4707 return Op.getStartLoc();
4708 }
4709 return getLoc();
4710}
4711
4712bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4713 const OperandVector &Operands) {
4714 if (isCI() || isSI())
4715 return true;
4716
4717 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4718 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4719 return true;
4720
4721 auto Opcode = Inst.getOpcode();
4722 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4723 if (OpNum == -1)
4724 return true;
4725
4726 const auto &Op = Inst.getOperand(OpNum);
4727 if (!Op.isImm())
4728 return true;
4729
4730 uint64_t Offset = Op.getImm();
4731 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4734 return true;
4735
4736 Error(getSMEMOffsetLoc(Operands),
4737 isGFX12Plus() && IsBuffer
4738 ? "expected a 23-bit unsigned offset for buffer ops"
4739 : isGFX12Plus() ? "expected a 24-bit signed offset"
4740 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4741 : "expected a 21-bit signed offset");
4742
4743 return false;
4744}
4745
4746bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4747 const OperandVector &Operands) {
4748 unsigned Opcode = Inst.getOpcode();
4749 const MCInstrDesc &Desc = MII.get(Opcode);
4750 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4751 return true;
4752
4753 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4754 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4755
4756 const int OpIndices[] = { Src0Idx, Src1Idx };
4757
4758 unsigned NumExprs = 0;
4759 unsigned NumLiterals = 0;
4760 int64_t LiteralValue;
4761
4762 for (int OpIdx : OpIndices) {
4763 if (OpIdx == -1) break;
4764
4765 const MCOperand &MO = Inst.getOperand(OpIdx);
4766 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4768 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4769 auto OpType = static_cast<AMDGPU::OperandType>(
4770 Desc.operands()[OpIdx].OperandType);
4771 int64_t Value = encode32BitLiteral(MO.getImm(), OpType);
4772 if (NumLiterals == 0 || LiteralValue != Value) {
4774 ++NumLiterals;
4775 }
4776 } else if (MO.isExpr()) {
4777 ++NumExprs;
4778 }
4779 }
4780 }
4781
4782 if (NumLiterals + NumExprs <= 1)
4783 return true;
4784
4785 Error(getOperandLoc(Operands, Src1Idx),
4786 "only one unique literal operand is allowed");
4787 return false;
4788}
4789
4790bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4791 const unsigned Opc = Inst.getOpcode();
4792 if (isPermlane16(Opc)) {
4793 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4794 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4795
4796 if (OpSel & ~3)
4797 return false;
4798 }
4799
4800 uint64_t TSFlags = MII.get(Opc).TSFlags;
4801
4802 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4803 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4804 if (OpSelIdx != -1) {
4805 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4806 return false;
4807 }
4808 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4809 if (OpSelHiIdx != -1) {
4810 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4811 return false;
4812 }
4813 }
4814
4815 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4816 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4817 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4818 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4819 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4820 if (OpSel & 3)
4821 return false;
4822 }
4823
4824 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4825 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4826 // the first SGPR and use it for both the low and high operations.
4827 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4828 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4829 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4830 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4831 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4832
4833 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4834 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4835 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4836 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4837
4838 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4839
4840 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4841 unsigned Mask = 1U << Index;
4842 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4843 };
4844
4845 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4846 !VerifyOneSGPR(/*Index=*/0))
4847 return false;
4848 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4849 !VerifyOneSGPR(/*Index=*/1))
4850 return false;
4851
4852 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4853 if (Src2Idx != -1) {
4854 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4855 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4856 !VerifyOneSGPR(/*Index=*/2))
4857 return false;
4858 }
4859 }
4860
4861 return true;
4862}
4863
4864bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4865 if (!hasTrue16Insts())
4866 return true;
4867 const MCRegisterInfo *MRI = getMRI();
4868 const unsigned Opc = Inst.getOpcode();
4869 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4870 if (OpSelIdx == -1)
4871 return true;
4872 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4873 // If the value is 0 we could have a default OpSel Operand, so conservatively
4874 // allow it.
4875 if (OpSelOpValue == 0)
4876 return true;
4877 unsigned OpCount = 0;
4878 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4879 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4880 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4881 if (OpIdx == -1)
4882 continue;
4883 const MCOperand &Op = Inst.getOperand(OpIdx);
4884 if (Op.isReg() &&
4885 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4886 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4887 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4888 if (OpSelOpIsHi != VGPRSuffixIsHi)
4889 return false;
4890 }
4891 ++OpCount;
4892 }
4893
4894 return true;
4895}
4896
4897bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4898 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4899
4900 const unsigned Opc = Inst.getOpcode();
4901 uint64_t TSFlags = MII.get(Opc).TSFlags;
4902
4903 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4904 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4905 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4906 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4907 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4908 !(TSFlags & SIInstrFlags::IsSWMMAC))
4909 return true;
4910
4911 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4912 if (NegIdx == -1)
4913 return true;
4914
4915 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4916
4917 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4918 // on some src operands but not allowed on other.
4919 // It is convenient that such instructions don't have src_modifiers operand
4920 // for src operands that don't allow neg because they also don't allow opsel.
4921
4922 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4923 AMDGPU::OpName::src1_modifiers,
4924 AMDGPU::OpName::src2_modifiers};
4925
4926 for (unsigned i = 0; i < 3; ++i) {
4927 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4928 if (Neg & (1 << i))
4929 return false;
4930 }
4931 }
4932
4933 return true;
4934}
4935
4936bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4937 const OperandVector &Operands) {
4938 const unsigned Opc = Inst.getOpcode();
4939 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4940 if (DppCtrlIdx >= 0) {
4941 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4942
4943 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
4944 AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
4945 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
4946 // only on GFX12.
4947 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4948 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
4949 : "DP ALU dpp only supports row_newbcast");
4950 return false;
4951 }
4952 }
4953
4954 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4955 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4956
4957 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4958 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4959 if (Src1Idx >= 0) {
4960 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4961 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4962 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4963 Error(getOperandLoc(Operands, Src1Idx),
4964 "invalid operand for instruction");
4965 return false;
4966 }
4967 if (Src1.isImm()) {
4968 Error(getInstLoc(Operands),
4969 "src1 immediate operand invalid for instruction");
4970 return false;
4971 }
4972 }
4973 }
4974
4975 return true;
4976}
4977
4978// Check if VCC register matches wavefront size
4979bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
4980 auto FB = getFeatureBits();
4981 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4982 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4983}
4984
4985// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4986bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4987 const OperandVector &Operands) {
4988 unsigned Opcode = Inst.getOpcode();
4989 const MCInstrDesc &Desc = MII.get(Opcode);
4990 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4991 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4992 !HasMandatoryLiteral && !isVOPD(Opcode))
4993 return true;
4994
4995 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4996
4997 std::optional<unsigned> LiteralOpIdx;
4998 std::optional<uint64_t> LiteralValue;
4999
5000 for (int OpIdx : OpIndices) {
5001 if (OpIdx == -1)
5002 continue;
5003
5004 const MCOperand &MO = Inst.getOperand(OpIdx);
5005 if (!MO.isImm() && !MO.isExpr())
5006 continue;
5007 if (!isSISrcOperand(Desc, OpIdx))
5008 continue;
5009
5010 bool IsAnotherLiteral = false;
5011 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
5012 uint64_t Value = static_cast<uint64_t>(MO.getImm());
5013 bool IsForcedFP64 =
5014 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5016 HasMandatoryLiteral);
5017 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5018 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5019 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5020
5021 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5022 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5023 Error(getOperandLoc(Operands, OpIdx),
5024 "invalid operand for instruction");
5025 return false;
5026 }
5027
5028 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5029 Value = Hi_32(Value);
5030
5031 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5033 } else if (MO.isExpr()) {
5034 // Literal value not known, so we conservately assume it's different.
5035 IsAnotherLiteral = true;
5036 }
5037
5038 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5039 !getFeatureBits()[FeatureVOP3Literal]) {
5040 Error(getOperandLoc(Operands, OpIdx),
5041 "literal operands are not supported");
5042 return false;
5043 }
5044
5045 if (LiteralOpIdx && IsAnotherLiteral) {
5046 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5047 getOperandLoc(Operands, *LiteralOpIdx)),
5048 "only one unique literal operand is allowed");
5049 return false;
5050 }
5051
5052 if (IsAnotherLiteral)
5053 LiteralOpIdx = OpIdx;
5054 }
5055
5056 return true;
5057}
5058
5059// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5060static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5061 const MCRegisterInfo *MRI) {
5062 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5063 if (OpIdx < 0)
5064 return -1;
5065
5066 const MCOperand &Op = Inst.getOperand(OpIdx);
5067 if (!Op.isReg())
5068 return -1;
5069
5070 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5071 auto Reg = Sub ? Sub : Op.getReg();
5072 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5073 return AGPR32.contains(Reg) ? 1 : 0;
5074}
5075
5076bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5077 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5078 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5080 SIInstrFlags::DS)) == 0)
5081 return true;
5082
5083 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5084 ? AMDGPU::OpName::data0
5085 : AMDGPU::OpName::vdata;
5086
5087 const MCRegisterInfo *MRI = getMRI();
5088 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5089 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5090
5091 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5092 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5093 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5094 return false;
5095 }
5096
5097 auto FB = getFeatureBits();
5098 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5099 if (DataAreg < 0 || DstAreg < 0)
5100 return true;
5101 return DstAreg == DataAreg;
5102 }
5103
5104 return DstAreg < 1 && DataAreg < 1;
5105}
5106
5107bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5108 auto FB = getFeatureBits();
5109 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5110 return true;
5111
5112 unsigned Opc = Inst.getOpcode();
5113 const MCRegisterInfo *MRI = getMRI();
5114 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5115 // unaligned VGPR. All others only allow even aligned VGPRs.
5116 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5117 return true;
5118
5119 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5120 switch (Opc) {
5121 default:
5122 break;
5123 case AMDGPU::DS_LOAD_TR6_B96:
5124 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5125 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5126 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5127 return true;
5128 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5129 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5130 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5131 // allows unaligned VGPR for vdst, but other operands still only allow
5132 // even aligned VGPRs.
5133 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5134 if (VAddrIdx != -1) {
5135 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5136 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5137 if ((Sub - AMDGPU::VGPR0) & 1)
5138 return false;
5139 }
5140 return true;
5141 }
5142 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5143 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5144 return true;
5145 }
5146 }
5147
5148 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5149 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5150 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5151 const MCOperand &Op = Inst.getOperand(I);
5152 if (!Op.isReg())
5153 continue;
5154
5155 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5156 if (!Sub)
5157 continue;
5158
5159 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5160 return false;
5161 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5162 return false;
5163 }
5164
5165 return true;
5166}
5167
5168SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5169 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5170 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5171 if (Op.isBLGP())
5172 return Op.getStartLoc();
5173 }
5174 return SMLoc();
5175}
5176
5177bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5178 const OperandVector &Operands) {
5179 unsigned Opc = Inst.getOpcode();
5180 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5181 if (BlgpIdx == -1)
5182 return true;
5183 SMLoc BLGPLoc = getBLGPLoc(Operands);
5184 if (!BLGPLoc.isValid())
5185 return true;
5186 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5187 auto FB = getFeatureBits();
5188 bool UsesNeg = false;
5189 if (FB[AMDGPU::FeatureGFX940Insts]) {
5190 switch (Opc) {
5191 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5192 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5193 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5194 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5195 UsesNeg = true;
5196 }
5197 }
5198
5199 if (IsNeg == UsesNeg)
5200 return true;
5201
5202 Error(BLGPLoc,
5203 UsesNeg ? "invalid modifier: blgp is not supported"
5204 : "invalid modifier: neg is not supported");
5205
5206 return false;
5207}
5208
5209bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5210 const OperandVector &Operands) {
5211 if (!isGFX11Plus())
5212 return true;
5213
5214 unsigned Opc = Inst.getOpcode();
5215 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5216 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5217 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5218 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5219 return true;
5220
5221 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5222 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5223 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5224 if (Reg == AMDGPU::SGPR_NULL)
5225 return true;
5226
5227 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5228 return false;
5229}
5230
5231bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5232 const OperandVector &Operands) {
5233 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5234 if ((TSFlags & SIInstrFlags::DS) == 0)
5235 return true;
5236 if (TSFlags & SIInstrFlags::GWS)
5237 return validateGWS(Inst, Operands);
5238 // Only validate GDS for non-GWS instructions.
5239 if (hasGDS())
5240 return true;
5241 int GDSIdx =
5242 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5243 if (GDSIdx < 0)
5244 return true;
5245 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5246 if (GDS) {
5247 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5248 Error(S, "gds modifier is not supported on this GPU");
5249 return false;
5250 }
5251 return true;
5252}
5253
5254// gfx90a has an undocumented limitation:
5255// DS_GWS opcodes must use even aligned registers.
5256bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5257 const OperandVector &Operands) {
5258 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5259 return true;
5260
5261 int Opc = Inst.getOpcode();
5262 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5263 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5264 return true;
5265
5266 const MCRegisterInfo *MRI = getMRI();
5267 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5268 int Data0Pos =
5269 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5270 assert(Data0Pos != -1);
5271 auto Reg = Inst.getOperand(Data0Pos).getReg();
5272 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5273 if (RegIdx & 1) {
5274 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5275 return false;
5276 }
5277
5278 return true;
5279}
5280
5281bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5282 const OperandVector &Operands,
5283 const SMLoc &IDLoc) {
5284 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5285 AMDGPU::OpName::cpol);
5286 if (CPolPos == -1)
5287 return true;
5288
5289 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5290
5291 if (!isGFX1250()) {
5292 if (CPol & CPol::SCAL) {
5293 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5294 StringRef CStr(S.getPointer());
5295 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5296 Error(S, "scale_offset is not supported on this GPU");
5297 }
5298 if (CPol & CPol::NV) {
5299 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5300 StringRef CStr(S.getPointer());
5301 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5302 Error(S, "nv is not supported on this GPU");
5303 }
5304 }
5305
5306 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5307 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5308 StringRef CStr(S.getPointer());
5309 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5310 Error(S, "scale_offset is not supported for this instruction");
5311 }
5312
5313 if (isGFX12Plus())
5314 return validateTHAndScopeBits(Inst, Operands, CPol);
5315
5316 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5317 if (TSFlags & SIInstrFlags::SMRD) {
5318 if (CPol && (isSI() || isCI())) {
5319 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5320 Error(S, "cache policy is not supported for SMRD instructions");
5321 return false;
5322 }
5323 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5324 Error(IDLoc, "invalid cache policy for SMEM instruction");
5325 return false;
5326 }
5327 }
5328
5329 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5330 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5333 if (!(TSFlags & AllowSCCModifier)) {
5334 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5335 StringRef CStr(S.getPointer());
5336 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5337 Error(S,
5338 "scc modifier is not supported for this instruction on this GPU");
5339 return false;
5340 }
5341 }
5342
5344 return true;
5345
5346 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5347 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5348 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5349 : "instruction must use glc");
5350 return false;
5351 }
5352 } else {
5353 if (CPol & CPol::GLC) {
5354 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5355 StringRef CStr(S.getPointer());
5357 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5358 Error(S, isGFX940() ? "instruction must not use sc0"
5359 : "instruction must not use glc");
5360 return false;
5361 }
5362 }
5363
5364 return true;
5365}
5366
5367bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5368 const OperandVector &Operands,
5369 const unsigned CPol) {
5370 const unsigned TH = CPol & AMDGPU::CPol::TH;
5371 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5372
5373 const unsigned Opcode = Inst.getOpcode();
5374 const MCInstrDesc &TID = MII.get(Opcode);
5375
5376 auto PrintError = [&](StringRef Msg) {
5377 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5378 Error(S, Msg);
5379 return false;
5380 };
5381
5382 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5385 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5386
5387 if (TH == 0)
5388 return true;
5389
5390 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5391 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5392 (TH == AMDGPU::CPol::TH_NT_HT)))
5393 return PrintError("invalid th value for SMEM instruction");
5394
5395 if (TH == AMDGPU::CPol::TH_BYPASS) {
5396 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5398 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5400 return PrintError("scope and th combination is not valid");
5401 }
5402
5403 unsigned THType = AMDGPU::getTemporalHintType(TID);
5404 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5405 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5406 return PrintError("invalid th value for atomic instructions");
5407 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5408 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5409 return PrintError("invalid th value for store instructions");
5410 } else {
5411 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5412 return PrintError("invalid th value for load instructions");
5413 }
5414
5415 return true;
5416}
5417
5418bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5419 const OperandVector &Operands) {
5420 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5421 if (Desc.mayStore() &&
5423 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5424 if (Loc != getInstLoc(Operands)) {
5425 Error(Loc, "TFE modifier has no meaning for store instructions");
5426 return false;
5427 }
5428 }
5429
5430 return true;
5431}
5432
5433bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,
5434 const OperandVector &Operands) {
5435 if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5436 return true;
5437
5438 int Simm16Pos =
5439 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);
5440 if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {
5441 SMLoc Loc = Operands[1]->getStartLoc();
5442 Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");
5443 return false;
5444 }
5445
5446 return true;
5447}
5448
5449bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5450 const OperandVector &Operands) {
5451 unsigned Opc = Inst.getOpcode();
5452 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5453 const MCInstrDesc &Desc = MII.get(Opc);
5454
5455 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5456 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5457 if (FmtIdx == -1)
5458 return true;
5459 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5460 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5461 unsigned RegSize =
5462 TRI->getRegClass(Desc.operands()[SrcIdx].RegClass).getSizeInBits();
5463
5465 return true;
5466
5467 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5468 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5469 "MATRIX_FMT_FP4"};
5470
5471 Error(getOperandLoc(Operands, SrcIdx),
5472 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5473 return false;
5474 };
5475
5476 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5477 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5478}
5479
5480bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5481 const SMLoc &IDLoc,
5482 const OperandVector &Operands) {
5483 if (!validateLdsDirect(Inst, Operands))
5484 return false;
5485 if (!validateTrue16OpSel(Inst)) {
5486 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5487 "op_sel operand conflicts with 16-bit operand suffix");
5488 return false;
5489 }
5490 if (!validateSOPLiteral(Inst, Operands))
5491 return false;
5492 if (!validateVOPLiteral(Inst, Operands)) {
5493 return false;
5494 }
5495 if (!validateConstantBusLimitations(Inst, Operands)) {
5496 return false;
5497 }
5498 if (!validateVOPD(Inst, Operands)) {
5499 return false;
5500 }
5501 if (!validateIntClampSupported(Inst)) {
5502 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5503 "integer clamping is not supported on this GPU");
5504 return false;
5505 }
5506 if (!validateOpSel(Inst)) {
5507 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5508 "invalid op_sel operand");
5509 return false;
5510 }
5511 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5512 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5513 "invalid neg_lo operand");
5514 return false;
5515 }
5516 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5517 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5518 "invalid neg_hi operand");
5519 return false;
5520 }
5521 if (!validateDPP(Inst, Operands)) {
5522 return false;
5523 }
5524 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5525 if (!validateMIMGD16(Inst)) {
5526 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5527 "d16 modifier is not supported on this GPU");
5528 return false;
5529 }
5530 if (!validateMIMGDim(Inst, Operands)) {
5531 Error(IDLoc, "missing dim operand");
5532 return false;
5533 }
5534 if (!validateTensorR128(Inst)) {
5535 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5536 "instruction must set modifier r128=0");
5537 return false;
5538 }
5539 if (!validateMIMGMSAA(Inst)) {
5540 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5541 "invalid dim; must be MSAA type");
5542 return false;
5543 }
5544 if (!validateMIMGDataSize(Inst, IDLoc)) {
5545 return false;
5546 }
5547 if (!validateMIMGAddrSize(Inst, IDLoc))
5548 return false;
5549 if (!validateMIMGAtomicDMask(Inst)) {
5550 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5551 "invalid atomic image dmask");
5552 return false;
5553 }
5554 if (!validateMIMGGatherDMask(Inst)) {
5555 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5556 "invalid image_gather dmask: only one bit must be set");
5557 return false;
5558 }
5559 if (!validateMovrels(Inst, Operands)) {
5560 return false;
5561 }
5562 if (!validateOffset(Inst, Operands)) {
5563 return false;
5564 }
5565 if (!validateMAIAccWrite(Inst, Operands)) {
5566 return false;
5567 }
5568 if (!validateMAISrc2(Inst, Operands)) {
5569 return false;
5570 }
5571 if (!validateMFMA(Inst, Operands)) {
5572 return false;
5573 }
5574 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5575 return false;
5576 }
5577
5578 if (!validateAGPRLdSt(Inst)) {
5579 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5580 ? "invalid register class: data and dst should be all VGPR or AGPR"
5581 : "invalid register class: agpr loads and stores not supported on this GPU"
5582 );
5583 return false;
5584 }
5585 if (!validateVGPRAlign(Inst)) {
5586 Error(IDLoc,
5587 "invalid register class: vgpr tuples must be 64 bit aligned");
5588 return false;
5589 }
5590 if (!validateDS(Inst, Operands)) {
5591 return false;
5592 }
5593
5594 if (!validateBLGP(Inst, Operands)) {
5595 return false;
5596 }
5597
5598 if (!validateDivScale(Inst)) {
5599 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5600 return false;
5601 }
5602 if (!validateWaitCnt(Inst, Operands)) {
5603 return false;
5604 }
5605 if (!validateTFE(Inst, Operands)) {
5606 return false;
5607 }
5608 if (!validateSetVgprMSB(Inst, Operands)) {
5609 return false;
5610 }
5611 if (!validateWMMA(Inst, Operands)) {
5612 return false;
5613 }
5614
5615 return true;
5616}
5617
5619 const FeatureBitset &FBS,
5620 unsigned VariantID = 0);
5621
5622static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5623 const FeatureBitset &AvailableFeatures,
5624 unsigned VariantID);
5625
5626bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5627 const FeatureBitset &FBS) {
5628 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5629}
5630
5631bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5632 const FeatureBitset &FBS,
5633 ArrayRef<unsigned> Variants) {
5634 for (auto Variant : Variants) {
5635 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5636 return true;
5637 }
5638
5639 return false;
5640}
5641
5642bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5643 const SMLoc &IDLoc) {
5644 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5645
5646 // Check if requested instruction variant is supported.
5647 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5648 return false;
5649
5650 // This instruction is not supported.
5651 // Clear any other pending errors because they are no longer relevant.
5652 getParser().clearPendingErrors();
5653
5654 // Requested instruction variant is not supported.
5655 // Check if any other variants are supported.
5656 StringRef VariantName = getMatchedVariantName();
5657 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5658 return Error(IDLoc,
5659 Twine(VariantName,
5660 " variant of this instruction is not supported"));
5661 }
5662
5663 // Check if this instruction may be used with a different wavesize.
5664 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5665 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5666
5667 FeatureBitset FeaturesWS32 = getFeatureBits();
5668 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5669 .flip(AMDGPU::FeatureWavefrontSize32);
5670 FeatureBitset AvailableFeaturesWS32 =
5671 ComputeAvailableFeatures(FeaturesWS32);
5672
5673 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5674 return Error(IDLoc, "instruction requires wavesize=32");
5675 }
5676
5677 // Finally check if this instruction is supported on any other GPU.
5678 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5679 return Error(IDLoc, "instruction not supported on this GPU");
5680 }
5681
5682 // Instruction not supported on any GPU. Probably a typo.
5683 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5684 return Error(IDLoc, "invalid instruction" + Suggestion);
5685}
5686
5688 uint64_t InvalidOprIdx) {
5689 assert(InvalidOprIdx < Operands.size());
5690 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5691 if (Op.isToken() && InvalidOprIdx > 1) {
5692 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5693 return PrevOp.isToken() && PrevOp.getToken() == "::";
5694 }
5695 return false;
5696}
5697
5698bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5700 MCStreamer &Out,
5701 uint64_t &ErrorInfo,
5702 bool MatchingInlineAsm) {
5703 MCInst Inst;
5704 Inst.setLoc(IDLoc);
5705 unsigned Result = Match_Success;
5706 for (auto Variant : getMatchedVariants()) {
5707 uint64_t EI;
5708 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5709 Variant);
5710 // We order match statuses from least to most specific. We use most specific
5711 // status as resulting
5712 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5713 if (R == Match_Success || R == Match_MissingFeature ||
5714 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5715 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5716 Result != Match_MissingFeature)) {
5717 Result = R;
5718 ErrorInfo = EI;
5719 }
5720 if (R == Match_Success)
5721 break;
5722 }
5723
5724 if (Result == Match_Success) {
5725 if (!validateInstruction(Inst, IDLoc, Operands)) {
5726 return true;
5727 }
5728 Out.emitInstruction(Inst, getSTI());
5729 return false;
5730 }
5731
5732 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5733 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5734 return true;
5735 }
5736
5737 switch (Result) {
5738 default: break;
5739 case Match_MissingFeature:
5740 // It has been verified that the specified instruction
5741 // mnemonic is valid. A match was found but it requires
5742 // features which are not supported on this GPU.
5743 return Error(IDLoc, "operands are not valid for this GPU or mode");
5744
5745 case Match_InvalidOperand: {
5746 SMLoc ErrorLoc = IDLoc;
5747 if (ErrorInfo != ~0ULL) {
5748 if (ErrorInfo >= Operands.size()) {
5749 return Error(IDLoc, "too few operands for instruction");
5750 }
5751 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5752 if (ErrorLoc == SMLoc())
5753 ErrorLoc = IDLoc;
5754
5755 if (isInvalidVOPDY(Operands, ErrorInfo))
5756 return Error(ErrorLoc, "invalid VOPDY instruction");
5757 }
5758 return Error(ErrorLoc, "invalid operand for instruction");
5759 }
5760
5761 case Match_MnemonicFail:
5762 llvm_unreachable("Invalid instructions should have been handled already");
5763 }
5764 llvm_unreachable("Implement any new match types added!");
5765}
5766
5767bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5768 int64_t Tmp = -1;
5769 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5770 return true;
5771 }
5772 if (getParser().parseAbsoluteExpression(Tmp)) {
5773 return true;
5774 }
5775 Ret = static_cast<uint32_t>(Tmp);
5776 return false;
5777}
5778
5779bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5780 if (!getSTI().getTargetTriple().isAMDGCN())
5781 return TokError("directive only supported for amdgcn architecture");
5782
5783 std::string TargetIDDirective;
5784 SMLoc TargetStart = getTok().getLoc();
5785 if (getParser().parseEscapedString(TargetIDDirective))
5786 return true;
5787
5788 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5789 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5790 return getParser().Error(TargetRange.Start,
5791 (Twine(".amdgcn_target directive's target id ") +
5792 Twine(TargetIDDirective) +
5793 Twine(" does not match the specified target id ") +
5794 Twine(getTargetStreamer().getTargetID()->toString())).str());
5795
5796 return false;
5797}
5798
5799bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5800 return Error(Range.Start, "value out of range", Range);
5801}
5802
5803bool AMDGPUAsmParser::calculateGPRBlocks(
5804 const FeatureBitset &Features, const MCExpr *VCCUsed,
5805 const MCExpr *FlatScrUsed, bool XNACKUsed,
5806 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5807 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5808 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5809 // TODO(scott.linder): These calculations are duplicated from
5810 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5811 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5812 MCContext &Ctx = getContext();
5813
5814 const MCExpr *NumSGPRs = NextFreeSGPR;
5815 int64_t EvaluatedSGPRs;
5816
5817 if (Version.Major >= 10)
5819 else {
5820 unsigned MaxAddressableNumSGPRs =
5822
5823 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5824 !Features.test(FeatureSGPRInitBug) &&
5825 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5826 return OutOfRangeError(SGPRRange);
5827
5828 const MCExpr *ExtraSGPRs =
5829 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5830 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5831
5832 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5833 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5834 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5835 return OutOfRangeError(SGPRRange);
5836
5837 if (Features.test(FeatureSGPRInitBug))
5838 NumSGPRs =
5840 }
5841
5842 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5843 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5844 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5845 unsigned Granule) -> const MCExpr * {
5846 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5847 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5848 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5849 const MCExpr *AlignToGPR =
5850 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5851 const MCExpr *DivGPR =
5852 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5853 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5854 return SubGPR;
5855 };
5856
5857 VGPRBlocks = GetNumGPRBlocks(
5858 NextFreeVGPR,
5859 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5860 SGPRBlocks =
5861 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5862
5863 return false;
5864}
5865
5866bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5867 if (!getSTI().getTargetTriple().isAMDGCN())
5868 return TokError("directive only supported for amdgcn architecture");
5869
5870 if (!isHsaAbi(getSTI()))
5871 return TokError("directive only supported for amdhsa OS");
5872
5873 StringRef KernelName;
5874 if (getParser().parseIdentifier(KernelName))
5875 return true;
5876
5877 AMDGPU::MCKernelDescriptor KD =
5879 &getSTI(), getContext());
5880
5881 StringSet<> Seen;
5882
5883 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5884
5885 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5886 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5887
5888 SMRange VGPRRange;
5889 const MCExpr *NextFreeVGPR = ZeroExpr;
5890 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5891 const MCExpr *NamedBarCnt = ZeroExpr;
5892 uint64_t SharedVGPRCount = 0;
5893 uint64_t PreloadLength = 0;
5894 uint64_t PreloadOffset = 0;
5895 SMRange SGPRRange;
5896 const MCExpr *NextFreeSGPR = ZeroExpr;
5897
5898 // Count the number of user SGPRs implied from the enabled feature bits.
5899 unsigned ImpliedUserSGPRCount = 0;
5900
5901 // Track if the asm explicitly contains the directive for the user SGPR
5902 // count.
5903 std::optional<unsigned> ExplicitUserSGPRCount;
5904 const MCExpr *ReserveVCC = OneExpr;
5905 const MCExpr *ReserveFlatScr = OneExpr;
5906 std::optional<bool> EnableWavefrontSize32;
5907
5908 while (true) {
5909 while (trySkipToken(AsmToken::EndOfStatement));
5910
5911 StringRef ID;
5912 SMRange IDRange = getTok().getLocRange();
5913 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5914 return true;
5915
5916 if (ID == ".end_amdhsa_kernel")
5917 break;
5918
5919 if (!Seen.insert(ID).second)
5920 return TokError(".amdhsa_ directives cannot be repeated");
5921
5922 SMLoc ValStart = getLoc();
5923 const MCExpr *ExprVal;
5924 if (getParser().parseExpression(ExprVal))
5925 return true;
5926 SMLoc ValEnd = getLoc();
5927 SMRange ValRange = SMRange(ValStart, ValEnd);
5928
5929 int64_t IVal = 0;
5930 uint64_t Val = IVal;
5931 bool EvaluatableExpr;
5932 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5933 if (IVal < 0)
5934 return OutOfRangeError(ValRange);
5935 Val = IVal;
5936 }
5937
5938#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5939 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5940 return OutOfRangeError(RANGE); \
5941 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5942 getContext());
5943
5944// Some fields use the parsed value immediately which requires the expression to
5945// be solvable.
5946#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5947 if (!(RESOLVED)) \
5948 return Error(IDRange.Start, "directive should have resolvable expression", \
5949 IDRange);
5950
5951 if (ID == ".amdhsa_group_segment_fixed_size") {
5953 CHAR_BIT>(Val))
5954 return OutOfRangeError(ValRange);
5955 KD.group_segment_fixed_size = ExprVal;
5956 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5958 CHAR_BIT>(Val))
5959 return OutOfRangeError(ValRange);
5960 KD.private_segment_fixed_size = ExprVal;
5961 } else if (ID == ".amdhsa_kernarg_size") {
5962 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5963 return OutOfRangeError(ValRange);
5964 KD.kernarg_size = ExprVal;
5965 } else if (ID == ".amdhsa_user_sgpr_count") {
5966 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5967 ExplicitUserSGPRCount = Val;
5968 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5969 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5971 return Error(IDRange.Start,
5972 "directive is not supported with architected flat scratch",
5973 IDRange);
5975 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5976 ExprVal, ValRange);
5977 if (Val)
5978 ImpliedUserSGPRCount += 4;
5979 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5980 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5981 if (!hasKernargPreload())
5982 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5983
5984 if (Val > getMaxNumUserSGPRs())
5985 return OutOfRangeError(ValRange);
5986 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5987 ValRange);
5988 if (Val) {
5989 ImpliedUserSGPRCount += Val;
5990 PreloadLength = Val;
5991 }
5992 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5993 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5994 if (!hasKernargPreload())
5995 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5996
5997 if (Val >= 1024)
5998 return OutOfRangeError(ValRange);
5999 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6000 ValRange);
6001 if (Val)
6002 PreloadOffset = Val;
6003 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6004 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6006 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6007 ValRange);
6008 if (Val)
6009 ImpliedUserSGPRCount += 2;
6010 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6011 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6013 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6014 ValRange);
6015 if (Val)
6016 ImpliedUserSGPRCount += 2;
6017 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6018 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6020 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6021 ExprVal, ValRange);
6022 if (Val)
6023 ImpliedUserSGPRCount += 2;
6024 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6025 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6027 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6028 ValRange);
6029 if (Val)
6030 ImpliedUserSGPRCount += 2;
6031 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6033 return Error(IDRange.Start,
6034 "directive is not supported with architected flat scratch",
6035 IDRange);
6036 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6038 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6039 ExprVal, ValRange);
6040 if (Val)
6041 ImpliedUserSGPRCount += 2;
6042 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6043 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6045 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6046 ExprVal, ValRange);
6047 if (Val)
6048 ImpliedUserSGPRCount += 1;
6049 } else if (ID == ".amdhsa_wavefront_size32") {
6050 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6051 if (IVersion.Major < 10)
6052 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6053 EnableWavefrontSize32 = Val;
6055 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6056 ValRange);
6057 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6059 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6060 ValRange);
6061 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6063 return Error(IDRange.Start,
6064 "directive is not supported with architected flat scratch",
6065 IDRange);
6067 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6068 ValRange);
6069 } else if (ID == ".amdhsa_enable_private_segment") {
6071 return Error(
6072 IDRange.Start,
6073 "directive is not supported without architected flat scratch",
6074 IDRange);
6076 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6077 ValRange);
6078 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6080 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6081 ValRange);
6082 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6084 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6085 ValRange);
6086 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6088 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6089 ValRange);
6090 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6092 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6093 ValRange);
6094 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6096 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6097 ValRange);
6098 } else if (ID == ".amdhsa_next_free_vgpr") {
6099 VGPRRange = ValRange;
6100 NextFreeVGPR = ExprVal;
6101 } else if (ID == ".amdhsa_next_free_sgpr") {
6102 SGPRRange = ValRange;
6103 NextFreeSGPR = ExprVal;
6104 } else if (ID == ".amdhsa_accum_offset") {
6105 if (!isGFX90A())
6106 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6107 AccumOffset = ExprVal;
6108 } else if (ID == ".amdhsa_named_barrier_count") {
6109 if (!isGFX1250())
6110 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6111 NamedBarCnt = ExprVal;
6112 } else if (ID == ".amdhsa_reserve_vcc") {
6113 if (EvaluatableExpr && !isUInt<1>(Val))
6114 return OutOfRangeError(ValRange);
6115 ReserveVCC = ExprVal;
6116 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6117 if (IVersion.Major < 7)
6118 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6120 return Error(IDRange.Start,
6121 "directive is not supported with architected flat scratch",
6122 IDRange);
6123 if (EvaluatableExpr && !isUInt<1>(Val))
6124 return OutOfRangeError(ValRange);
6125 ReserveFlatScr = ExprVal;
6126 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6127 if (IVersion.Major < 8)
6128 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6129 if (!isUInt<1>(Val))
6130 return OutOfRangeError(ValRange);
6131 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6132 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6133 IDRange);
6134 } else if (ID == ".amdhsa_float_round_mode_32") {
6136 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6137 ValRange);
6138 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6140 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6141 ValRange);
6142 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6144 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6145 ValRange);
6146 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6148 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6149 ValRange);
6150 } else if (ID == ".amdhsa_dx10_clamp") {
6151 if (IVersion.Major >= 12)
6152 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6154 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6155 ValRange);
6156 } else if (ID == ".amdhsa_ieee_mode") {
6157 if (IVersion.Major >= 12)
6158 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6160 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6161 ValRange);
6162 } else if (ID == ".amdhsa_fp16_overflow") {
6163 if (IVersion.Major < 9)
6164 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6166 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6167 ValRange);
6168 } else if (ID == ".amdhsa_tg_split") {
6169 if (!isGFX90A())
6170 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6171 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6172 ExprVal, ValRange);
6173 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6174 if (!supportsWGP(getSTI()))
6175 return Error(IDRange.Start,
6176 "directive unsupported on " + getSTI().getCPU(), IDRange);
6178 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6179 ValRange);
6180 } else if (ID == ".amdhsa_memory_ordered") {
6181 if (IVersion.Major < 10)
6182 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6184 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6185 ValRange);
6186 } else if (ID == ".amdhsa_forward_progress") {
6187 if (IVersion.Major < 10)
6188 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6190 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6191 ValRange);
6192 } else if (ID == ".amdhsa_shared_vgpr_count") {
6193 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6194 if (IVersion.Major < 10 || IVersion.Major >= 12)
6195 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6196 IDRange);
6197 SharedVGPRCount = Val;
6199 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6200 ValRange);
6201 } else if (ID == ".amdhsa_inst_pref_size") {
6202 if (IVersion.Major < 11)
6203 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6204 if (IVersion.Major == 11) {
6206 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6207 ValRange);
6208 } else {
6210 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6211 ValRange);
6212 }
6213 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6216 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6217 ExprVal, ValRange);
6218 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6220 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6221 ExprVal, ValRange);
6222 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6225 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6226 ExprVal, ValRange);
6227 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6229 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6230 ExprVal, ValRange);
6231 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6233 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6234 ExprVal, ValRange);
6235 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6237 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6238 ExprVal, ValRange);
6239 } else if (ID == ".amdhsa_exception_int_div_zero") {
6241 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6242 ExprVal, ValRange);
6243 } else if (ID == ".amdhsa_round_robin_scheduling") {
6244 if (IVersion.Major < 12)
6245 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6247 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6248 ValRange);
6249 } else {
6250 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6251 }
6252
6253#undef PARSE_BITS_ENTRY
6254 }
6255
6256 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6257 return TokError(".amdhsa_next_free_vgpr directive is required");
6258
6259 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6260 return TokError(".amdhsa_next_free_sgpr directive is required");
6261
6262 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6263
6264 // Consider the case where the total number of UserSGPRs with trailing
6265 // allocated preload SGPRs, is greater than the number of explicitly
6266 // referenced SGPRs.
6267 if (PreloadLength) {
6268 MCContext &Ctx = getContext();
6269 NextFreeSGPR = AMDGPUMCExpr::createMax(
6270 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6271 }
6272
6273 const MCExpr *VGPRBlocks;
6274 const MCExpr *SGPRBlocks;
6275 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6276 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6277 EnableWavefrontSize32, NextFreeVGPR,
6278 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6279 SGPRBlocks))
6280 return true;
6281
6282 int64_t EvaluatedVGPRBlocks;
6283 bool VGPRBlocksEvaluatable =
6284 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6285 if (VGPRBlocksEvaluatable &&
6287 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6288 return OutOfRangeError(VGPRRange);
6289 }
6291 KD.compute_pgm_rsrc1, VGPRBlocks,
6292 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6293 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6294
6295 int64_t EvaluatedSGPRBlocks;
6296 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6298 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6299 return OutOfRangeError(SGPRRange);
6301 KD.compute_pgm_rsrc1, SGPRBlocks,
6302 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6303 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6304
6305 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6306 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6307 "enabled user SGPRs");
6308
6309 if (isGFX1250()) {
6311 return TokError("too many user SGPRs enabled");
6314 MCConstantExpr::create(UserSGPRCount, getContext()),
6315 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6316 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6317 } else {
6319 UserSGPRCount))
6320 return TokError("too many user SGPRs enabled");
6323 MCConstantExpr::create(UserSGPRCount, getContext()),
6324 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6325 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6326 }
6327
6328 int64_t IVal = 0;
6329 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6330 return TokError("Kernarg size should be resolvable");
6331 uint64_t kernarg_size = IVal;
6332 if (PreloadLength && kernarg_size &&
6333 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6334 return TokError("Kernarg preload length + offset is larger than the "
6335 "kernarg segment size");
6336
6337 if (isGFX90A()) {
6338 if (!Seen.contains(".amdhsa_accum_offset"))
6339 return TokError(".amdhsa_accum_offset directive is required");
6340 int64_t EvaluatedAccum;
6341 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6342 uint64_t UEvaluatedAccum = EvaluatedAccum;
6343 if (AccumEvaluatable &&
6344 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6345 return TokError("accum_offset should be in range [4..256] in "
6346 "increments of 4");
6347
6348 int64_t EvaluatedNumVGPR;
6349 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6350 AccumEvaluatable &&
6351 UEvaluatedAccum >
6352 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6353 return TokError("accum_offset exceeds total VGPR allocation");
6354 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6356 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6359 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6360 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6361 getContext());
6362 }
6363
6364 if (isGFX1250())
6366 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6367 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6368 getContext());
6369
6370 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6371 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6372 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6373 return TokError("shared_vgpr_count directive not valid on "
6374 "wavefront size 32");
6375 }
6376
6377 if (VGPRBlocksEvaluatable &&
6378 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6379 63)) {
6380 return TokError("shared_vgpr_count*2 + "
6381 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6382 "exceed 63\n");
6383 }
6384 }
6385
6386 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6387 NextFreeVGPR, NextFreeSGPR,
6388 ReserveVCC, ReserveFlatScr);
6389 return false;
6390}
6391
6392bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6393 uint32_t Version;
6394 if (ParseAsAbsoluteExpression(Version))
6395 return true;
6396
6397 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6398 return false;
6399}
6400
6401bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6402 AMDGPUMCKernelCodeT &C) {
6403 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6404 // assembly for backwards compatibility.
6405 if (ID == "max_scratch_backing_memory_byte_size") {
6406 Parser.eatToEndOfStatement();
6407 return false;
6408 }
6409
6410 SmallString<40> ErrStr;
6411 raw_svector_ostream Err(ErrStr);
6412 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6413 return TokError(Err.str());
6414 }
6415 Lex();
6416
6417 if (ID == "enable_wavefront_size32") {
6418 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6419 if (!isGFX10Plus())
6420 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6421 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6422 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6423 } else {
6424 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6425 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6426 }
6427 }
6428
6429 if (ID == "wavefront_size") {
6430 if (C.wavefront_size == 5) {
6431 if (!isGFX10Plus())
6432 return TokError("wavefront_size=5 is only allowed on GFX10+");
6433 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6434 return TokError("wavefront_size=5 requires +WavefrontSize32");
6435 } else if (C.wavefront_size == 6) {
6436 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6437 return TokError("wavefront_size=6 requires +WavefrontSize64");
6438 }
6439 }
6440
6441 return false;
6442}
6443
6444bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6445 AMDGPUMCKernelCodeT KernelCode;
6446 KernelCode.initDefault(&getSTI(), getContext());
6447
6448 while (true) {
6449 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6450 // will set the current token to EndOfStatement.
6451 while(trySkipToken(AsmToken::EndOfStatement));
6452
6453 StringRef ID;
6454 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6455 return true;
6456
6457 if (ID == ".end_amd_kernel_code_t")
6458 break;
6459
6460 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6461 return true;
6462 }
6463
6464 KernelCode.validate(&getSTI(), getContext());
6465 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6466
6467 return false;
6468}
6469
6470bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6471 StringRef KernelName;
6472 if (!parseId(KernelName, "expected symbol name"))
6473 return true;
6474
6475 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6477
6478 KernelScope.initialize(getContext());
6479 return false;
6480}
6481
6482bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6483 if (!getSTI().getTargetTriple().isAMDGCN()) {
6484 return Error(getLoc(),
6485 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6486 "architectures");
6487 }
6488
6489 auto TargetIDDirective = getLexer().getTok().getStringContents();
6490 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6491 return Error(getParser().getTok().getLoc(), "target id must match options");
6492
6493 getTargetStreamer().EmitISAVersion();
6494 Lex();
6495
6496 return false;
6497}
6498
6499bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6500 assert(isHsaAbi(getSTI()));
6501
6502 std::string HSAMetadataString;
6503 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6504 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6505 return true;
6506
6507 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6508 return Error(getLoc(), "invalid HSA metadata");
6509
6510 return false;
6511}
6512
6513/// Common code to parse out a block of text (typically YAML) between start and
6514/// end directives.
6515bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6516 const char *AssemblerDirectiveEnd,
6517 std::string &CollectString) {
6518
6519 raw_string_ostream CollectStream(CollectString);
6520
6521 getLexer().setSkipSpace(false);
6522
6523 bool FoundEnd = false;
6524 while (!isToken(AsmToken::Eof)) {
6525 while (isToken(AsmToken::Space)) {
6526 CollectStream << getTokenStr();
6527 Lex();
6528 }
6529
6530 if (trySkipId(AssemblerDirectiveEnd)) {
6531 FoundEnd = true;
6532 break;
6533 }
6534
6535 CollectStream << Parser.parseStringToEndOfStatement()
6536 << getContext().getAsmInfo()->getSeparatorString();
6537
6538 Parser.eatToEndOfStatement();
6539 }
6540
6541 getLexer().setSkipSpace(true);
6542
6543 if (isToken(AsmToken::Eof) && !FoundEnd) {
6544 return TokError(Twine("expected directive ") +
6545 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6546 }
6547
6548 return false;
6549}
6550
6551/// Parse the assembler directive for new MsgPack-format PAL metadata.
6552bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6553 std::string String;
6554 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6556 return true;
6557
6558 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6559 if (!PALMetadata->setFromString(String))
6560 return Error(getLoc(), "invalid PAL metadata");
6561 return false;
6562}
6563
6564/// Parse the assembler directive for old linear-format PAL metadata.
6565bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6566 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6567 return Error(getLoc(),
6568 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6569 "not available on non-amdpal OSes")).str());
6570 }
6571
6572 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6573 PALMetadata->setLegacy();
6574 for (;;) {
6575 uint32_t Key, Value;
6576 if (ParseAsAbsoluteExpression(Key)) {
6577 return TokError(Twine("invalid value in ") +
6579 }
6580 if (!trySkipToken(AsmToken::Comma)) {
6581 return TokError(Twine("expected an even number of values in ") +
6583 }
6584 if (ParseAsAbsoluteExpression(Value)) {
6585 return TokError(Twine("invalid value in ") +
6587 }
6588 PALMetadata->setRegister(Key, Value);
6589 if (!trySkipToken(AsmToken::Comma))
6590 break;
6591 }
6592 return false;
6593}
6594
6595/// ParseDirectiveAMDGPULDS
6596/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6597bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6598 if (getParser().checkForValidSection())
6599 return true;
6600
6601 StringRef Name;
6602 SMLoc NameLoc = getLoc();
6603 if (getParser().parseIdentifier(Name))
6604 return TokError("expected identifier in directive");
6605
6606 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6607 if (getParser().parseComma())
6608 return true;
6609
6610 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6611
6612 int64_t Size;
6613 SMLoc SizeLoc = getLoc();
6614 if (getParser().parseAbsoluteExpression(Size))
6615 return true;
6616 if (Size < 0)
6617 return Error(SizeLoc, "size must be non-negative");
6618 if (Size > LocalMemorySize)
6619 return Error(SizeLoc, "size is too large");
6620
6621 int64_t Alignment = 4;
6622 if (trySkipToken(AsmToken::Comma)) {
6623 SMLoc AlignLoc = getLoc();
6624 if (getParser().parseAbsoluteExpression(Alignment))
6625 return true;
6626 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6627 return Error(AlignLoc, "alignment must be a power of two");
6628
6629 // Alignment larger than the size of LDS is possible in theory, as long
6630 // as the linker manages to place to symbol at address 0, but we do want
6631 // to make sure the alignment fits nicely into a 32-bit integer.
6632 if (Alignment >= 1u << 31)
6633 return Error(AlignLoc, "alignment is too large");
6634 }
6635
6636 if (parseEOL())
6637 return true;
6638
6639 Symbol->redefineIfPossible();
6640 if (!Symbol->isUndefined())
6641 return Error(NameLoc, "invalid symbol redefinition");
6642
6643 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6644 return false;
6645}
6646
6647bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6648 StringRef IDVal = DirectiveID.getString();
6649
6650 if (isHsaAbi(getSTI())) {
6651 if (IDVal == ".amdhsa_kernel")
6652 return ParseDirectiveAMDHSAKernel();
6653
6654 if (IDVal == ".amdhsa_code_object_version")
6655 return ParseDirectiveAMDHSACodeObjectVersion();
6656
6657 // TODO: Restructure/combine with PAL metadata directive.
6659 return ParseDirectiveHSAMetadata();
6660 } else {
6661 if (IDVal == ".amd_kernel_code_t")
6662 return ParseDirectiveAMDKernelCodeT();
6663
6664 if (IDVal == ".amdgpu_hsa_kernel")
6665 return ParseDirectiveAMDGPUHsaKernel();
6666
6667 if (IDVal == ".amd_amdgpu_isa")
6668 return ParseDirectiveISAVersion();
6669
6671 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6672 Twine(" directive is "
6673 "not available on non-amdhsa OSes"))
6674 .str());
6675 }
6676 }
6677
6678 if (IDVal == ".amdgcn_target")
6679 return ParseDirectiveAMDGCNTarget();
6680
6681 if (IDVal == ".amdgpu_lds")
6682 return ParseDirectiveAMDGPULDS();
6683
6684 if (IDVal == PALMD::AssemblerDirectiveBegin)
6685 return ParseDirectivePALMetadataBegin();
6686
6687 if (IDVal == PALMD::AssemblerDirective)
6688 return ParseDirectivePALMetadata();
6689
6690 return true;
6691}
6692
6693bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6694 MCRegister Reg) {
6695 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6696 return isGFX9Plus();
6697
6698 // GFX10+ has 2 more SGPRs 104 and 105.
6699 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6700 return hasSGPR104_SGPR105();
6701
6702 switch (Reg.id()) {
6703 case SRC_SHARED_BASE_LO:
6704 case SRC_SHARED_BASE:
6705 case SRC_SHARED_LIMIT_LO:
6706 case SRC_SHARED_LIMIT:
6707 case SRC_PRIVATE_BASE_LO:
6708 case SRC_PRIVATE_BASE:
6709 case SRC_PRIVATE_LIMIT_LO:
6710 case SRC_PRIVATE_LIMIT:
6711 return isGFX9Plus();
6712 case SRC_FLAT_SCRATCH_BASE_LO:
6713 case SRC_FLAT_SCRATCH_BASE_HI:
6714 return hasGloballyAddressableScratch();
6715 case SRC_POPS_EXITING_WAVE_ID:
6716 return isGFX9Plus() && !isGFX11Plus();
6717 case TBA:
6718 case TBA_LO:
6719 case TBA_HI:
6720 case TMA:
6721 case TMA_LO:
6722 case TMA_HI:
6723 return !isGFX9Plus();
6724 case XNACK_MASK:
6725 case XNACK_MASK_LO:
6726 case XNACK_MASK_HI:
6727 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6728 case SGPR_NULL:
6729 return isGFX10Plus();
6730 case SRC_EXECZ:
6731 case SRC_VCCZ:
6732 return !isGFX11Plus();
6733 default:
6734 break;
6735 }
6736
6737 if (isCI())
6738 return true;
6739
6740 if (isSI() || isGFX10Plus()) {
6741 // No flat_scr on SI.
6742 // On GFX10Plus flat scratch is not a valid register operand and can only be
6743 // accessed with s_setreg/s_getreg.
6744 switch (Reg.id()) {
6745 case FLAT_SCR:
6746 case FLAT_SCR_LO:
6747 case FLAT_SCR_HI:
6748 return false;
6749 default:
6750 return true;
6751 }
6752 }
6753
6754 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6755 // SI/CI have.
6756 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6757 return hasSGPR102_SGPR103();
6758
6759 return true;
6760}
6761
6762ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6763 StringRef Mnemonic,
6764 OperandMode Mode) {
6765 ParseStatus Res = parseVOPD(Operands);
6766 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6767 return Res;
6768
6769 // Try to parse with a custom parser
6770 Res = MatchOperandParserImpl(Operands, Mnemonic);
6771
6772 // If we successfully parsed the operand or if there as an error parsing,
6773 // we are done.
6774 //
6775 // If we are parsing after we reach EndOfStatement then this means we
6776 // are appending default values to the Operands list. This is only done
6777 // by custom parser, so we shouldn't continue on to the generic parsing.
6778 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6779 return Res;
6780
6781 SMLoc RBraceLoc;
6782 SMLoc LBraceLoc = getLoc();
6783 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6784 unsigned Prefix = Operands.size();
6785
6786 for (;;) {
6787 auto Loc = getLoc();
6788 Res = parseReg(Operands);
6789 if (Res.isNoMatch())
6790 Error(Loc, "expected a register");
6791 if (!Res.isSuccess())
6792 return ParseStatus::Failure;
6793
6794 RBraceLoc = getLoc();
6795 if (trySkipToken(AsmToken::RBrac))
6796 break;
6797
6798 if (!skipToken(AsmToken::Comma,
6799 "expected a comma or a closing square bracket"))
6800 return ParseStatus::Failure;
6801 }
6802
6803 if (Operands.size() - Prefix > 1) {
6804 Operands.insert(Operands.begin() + Prefix,
6805 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6806 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6807 }
6808
6809 return ParseStatus::Success;
6810 }
6811
6812 return parseRegOrImm(Operands);
6813}
6814
6815StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6816 // Clear any forced encodings from the previous instruction.
6817 setForcedEncodingSize(0);
6818 setForcedDPP(false);
6819 setForcedSDWA(false);
6820
6821 if (Name.consume_back("_e64_dpp")) {
6822 setForcedDPP(true);
6823 setForcedEncodingSize(64);
6824 return Name;
6825 }
6826 if (Name.consume_back("_e64")) {
6827 setForcedEncodingSize(64);
6828 return Name;
6829 }
6830 if (Name.consume_back("_e32")) {
6831 setForcedEncodingSize(32);
6832 return Name;
6833 }
6834 if (Name.consume_back("_dpp")) {
6835 setForcedDPP(true);
6836 return Name;
6837 }
6838 if (Name.consume_back("_sdwa")) {
6839 setForcedSDWA(true);
6840 return Name;
6841 }
6842 return Name;
6843}
6844
6845static void applyMnemonicAliases(StringRef &Mnemonic,
6846 const FeatureBitset &Features,
6847 unsigned VariantID);
6848
6849bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6850 StringRef Name, SMLoc NameLoc,
6852 // Add the instruction mnemonic
6853 Name = parseMnemonicSuffix(Name);
6854
6855 // If the target architecture uses MnemonicAlias, call it here to parse
6856 // operands correctly.
6857 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6858
6859 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6860
6861 bool IsMIMG = Name.starts_with("image_");
6862
6863 while (!trySkipToken(AsmToken::EndOfStatement)) {
6864 OperandMode Mode = OperandMode_Default;
6865 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6866 Mode = OperandMode_NSA;
6867 ParseStatus Res = parseOperand(Operands, Name, Mode);
6868
6869 if (!Res.isSuccess()) {
6870 checkUnsupportedInstruction(Name, NameLoc);
6871 if (!Parser.hasPendingError()) {
6872 // FIXME: use real operand location rather than the current location.
6873 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6874 : "not a valid operand.";
6875 Error(getLoc(), Msg);
6876 }
6877 while (!trySkipToken(AsmToken::EndOfStatement)) {
6878 lex();
6879 }
6880 return true;
6881 }
6882
6883 // Eat the comma or space if there is one.
6884 trySkipToken(AsmToken::Comma);
6885 }
6886
6887 return false;
6888}
6889
6890//===----------------------------------------------------------------------===//
6891// Utility functions
6892//===----------------------------------------------------------------------===//
6893
6894ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6896 SMLoc S = getLoc();
6897 if (!trySkipId(Name))
6898 return ParseStatus::NoMatch;
6899
6900 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6901 return ParseStatus::Success;
6902}
6903
6904ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6905 int64_t &IntVal) {
6906
6907 if (!trySkipId(Prefix, AsmToken::Colon))
6908 return ParseStatus::NoMatch;
6909
6911}
6912
6913ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6914 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6915 std::function<bool(int64_t &)> ConvertResult) {
6916 SMLoc S = getLoc();
6917 int64_t Value = 0;
6918
6919 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6920 if (!Res.isSuccess())
6921 return Res;
6922
6923 if (ConvertResult && !ConvertResult(Value)) {
6924 Error(S, "invalid " + StringRef(Prefix) + " value.");
6925 }
6926
6927 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6928 return ParseStatus::Success;
6929}
6930
6931ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6932 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6933 bool (*ConvertResult)(int64_t &)) {
6934 SMLoc S = getLoc();
6935 if (!trySkipId(Prefix, AsmToken::Colon))
6936 return ParseStatus::NoMatch;
6937
6938 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6939 return ParseStatus::Failure;
6940
6941 unsigned Val = 0;
6942 const unsigned MaxSize = 4;
6943
6944 // FIXME: How to verify the number of elements matches the number of src
6945 // operands?
6946 for (int I = 0; ; ++I) {
6947 int64_t Op;
6948 SMLoc Loc = getLoc();
6949 if (!parseExpr(Op))
6950 return ParseStatus::Failure;
6951
6952 if (Op != 0 && Op != 1)
6953 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6954
6955 Val |= (Op << I);
6956
6957 if (trySkipToken(AsmToken::RBrac))
6958 break;
6959
6960 if (I + 1 == MaxSize)
6961 return Error(getLoc(), "expected a closing square bracket");
6962
6963 if (!skipToken(AsmToken::Comma, "expected a comma"))
6964 return ParseStatus::Failure;
6965 }
6966
6967 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6968 return ParseStatus::Success;
6969}
6970
6971ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6973 AMDGPUOperand::ImmTy ImmTy) {
6974 int64_t Bit;
6975 SMLoc S = getLoc();
6976
6977 if (trySkipId(Name)) {
6978 Bit = 1;
6979 } else if (trySkipId("no", Name)) {
6980 Bit = 0;
6981 } else {
6982 return ParseStatus::NoMatch;
6983 }
6984
6985 if (Name == "r128" && !hasMIMG_R128())
6986 return Error(S, "r128 modifier is not supported on this GPU");
6987 if (Name == "a16" && !hasA16())
6988 return Error(S, "a16 modifier is not supported on this GPU");
6989
6990 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6991 ImmTy = AMDGPUOperand::ImmTyR128A16;
6992
6993 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6994 return ParseStatus::Success;
6995}
6996
6997unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6998 bool &Disabling) const {
6999 Disabling = Id.consume_front("no");
7000
7001 if (isGFX940() && !Mnemo.starts_with("s_")) {
7002 return StringSwitch<unsigned>(Id)
7003 .Case("nt", AMDGPU::CPol::NT)
7004 .Case("sc0", AMDGPU::CPol::SC0)
7005 .Case("sc1", AMDGPU::CPol::SC1)
7006 .Default(0);
7007 }
7008
7009 return StringSwitch<unsigned>(Id)
7010 .Case("dlc", AMDGPU::CPol::DLC)
7011 .Case("glc", AMDGPU::CPol::GLC)
7012 .Case("scc", AMDGPU::CPol::SCC)
7013 .Case("slc", AMDGPU::CPol::SLC)
7014 .Default(0);
7015}
7016
7017ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7018 if (isGFX12Plus()) {
7019 SMLoc StringLoc = getLoc();
7020
7021 int64_t CPolVal = 0;
7022 ParseStatus ResTH = ParseStatus::NoMatch;
7023 ParseStatus ResScope = ParseStatus::NoMatch;
7024 ParseStatus ResNV = ParseStatus::NoMatch;
7025 ParseStatus ResScal = ParseStatus::NoMatch;
7026
7027 for (;;) {
7028 if (ResTH.isNoMatch()) {
7029 int64_t TH;
7030 ResTH = parseTH(Operands, TH);
7031 if (ResTH.isFailure())
7032 return ResTH;
7033 if (ResTH.isSuccess()) {
7034 CPolVal |= TH;
7035 continue;
7036 }
7037 }
7038
7039 if (ResScope.isNoMatch()) {
7040 int64_t Scope;
7041 ResScope = parseScope(Operands, Scope);
7042 if (ResScope.isFailure())
7043 return ResScope;
7044 if (ResScope.isSuccess()) {
7045 CPolVal |= Scope;
7046 continue;
7047 }
7048 }
7049
7050 // NV bit exists on GFX12+, but does something starting from GFX1250.
7051 // Allow parsing on all GFX12 and fail on validation for better
7052 // diagnostics.
7053 if (ResNV.isNoMatch()) {
7054 if (trySkipId("nv")) {
7055 ResNV = ParseStatus::Success;
7056 CPolVal |= CPol::NV;
7057 continue;
7058 } else if (trySkipId("no", "nv")) {
7059 ResNV = ParseStatus::Success;
7060 continue;
7061 }
7062 }
7063
7064 if (ResScal.isNoMatch()) {
7065 if (trySkipId("scale_offset")) {
7066 ResScal = ParseStatus::Success;
7067 CPolVal |= CPol::SCAL;
7068 continue;
7069 } else if (trySkipId("no", "scale_offset")) {
7070 ResScal = ParseStatus::Success;
7071 continue;
7072 }
7073 }
7074
7075 break;
7076 }
7077
7078 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7079 ResScal.isNoMatch())
7080 return ParseStatus::NoMatch;
7081
7082 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7083 AMDGPUOperand::ImmTyCPol));
7084 return ParseStatus::Success;
7085 }
7086
7087 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7088 SMLoc OpLoc = getLoc();
7089 unsigned Enabled = 0, Seen = 0;
7090 for (;;) {
7091 SMLoc S = getLoc();
7092 bool Disabling;
7093 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7094 if (!CPol)
7095 break;
7096
7097 lex();
7098
7099 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7100 return Error(S, "dlc modifier is not supported on this GPU");
7101
7102 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7103 return Error(S, "scc modifier is not supported on this GPU");
7104
7105 if (Seen & CPol)
7106 return Error(S, "duplicate cache policy modifier");
7107
7108 if (!Disabling)
7109 Enabled |= CPol;
7110
7111 Seen |= CPol;
7112 }
7113
7114 if (!Seen)
7115 return ParseStatus::NoMatch;
7116
7117 Operands.push_back(
7118 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7119 return ParseStatus::Success;
7120}
7121
7122ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7123 int64_t &Scope) {
7124 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7126
7127 ParseStatus Res = parseStringOrIntWithPrefix(
7128 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7129 Scope);
7130
7131 if (Res.isSuccess())
7132 Scope = Scopes[Scope];
7133
7134 return Res;
7135}
7136
7137ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7138 TH = AMDGPU::CPol::TH_RT; // default
7139
7140 StringRef Value;
7141 SMLoc StringLoc;
7142 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7143 if (!Res.isSuccess())
7144 return Res;
7145
7146 if (Value == "TH_DEFAULT")
7148 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7149 Value == "TH_LOAD_NT_WB") {
7150 return Error(StringLoc, "invalid th value");
7151 } else if (Value.consume_front("TH_ATOMIC_")) {
7153 } else if (Value.consume_front("TH_LOAD_")) {
7155 } else if (Value.consume_front("TH_STORE_")) {
7157 } else {
7158 return Error(StringLoc, "invalid th value");
7159 }
7160
7161 if (Value == "BYPASS")
7163
7164 if (TH != 0) {
7166 TH |= StringSwitch<int64_t>(Value)
7167 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7168 .Case("RT", AMDGPU::CPol::TH_RT)
7169 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7170 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7171 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7173 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7174 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7176 .Default(0xffffffff);
7177 else
7178 TH |= StringSwitch<int64_t>(Value)
7179 .Case("RT", AMDGPU::CPol::TH_RT)
7180 .Case("NT", AMDGPU::CPol::TH_NT)
7181 .Case("HT", AMDGPU::CPol::TH_HT)
7182 .Case("LU", AMDGPU::CPol::TH_LU)
7183 .Case("WB", AMDGPU::CPol::TH_WB)
7184 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7185 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7186 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7187 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7188 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7189 .Default(0xffffffff);
7190 }
7191
7192 if (TH == 0xffffffff)
7193 return Error(StringLoc, "invalid th value");
7194
7195 return ParseStatus::Success;
7196}
7197
7198static void
7200 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7201 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7202 std::optional<unsigned> InsertAt = std::nullopt) {
7203 auto i = OptionalIdx.find(ImmT);
7204 if (i != OptionalIdx.end()) {
7205 unsigned Idx = i->second;
7206 const AMDGPUOperand &Op =
7207 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7208 if (InsertAt)
7209 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7210 else
7211 Op.addImmOperands(Inst, 1);
7212 } else {
7213 if (InsertAt.has_value())
7214 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7215 else
7217 }
7218}
7219
7220ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7221 StringRef &Value,
7222 SMLoc &StringLoc) {
7223 if (!trySkipId(Prefix, AsmToken::Colon))
7224 return ParseStatus::NoMatch;
7225
7226 StringLoc = getLoc();
7227 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7229}
7230
7231ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7232 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7233 int64_t &IntVal) {
7234 if (!trySkipId(Name, AsmToken::Colon))
7235 return ParseStatus::NoMatch;
7236
7237 SMLoc StringLoc = getLoc();
7238
7239 StringRef Value;
7240 if (isToken(AsmToken::Identifier)) {
7241 Value = getTokenStr();
7242 lex();
7243
7244 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7245 if (Value == Ids[IntVal])
7246 break;
7247 } else if (!parseExpr(IntVal))
7248 return ParseStatus::Failure;
7249
7250 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7251 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7252
7253 return ParseStatus::Success;
7254}
7255
7256ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7257 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7258 AMDGPUOperand::ImmTy Type) {
7259 SMLoc S = getLoc();
7260 int64_t IntVal;
7261
7262 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7263 if (Res.isSuccess())
7264 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7265
7266 return Res;
7267}
7268
7269//===----------------------------------------------------------------------===//
7270// MTBUF format
7271//===----------------------------------------------------------------------===//
7272
7273bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7274 int64_t MaxVal,
7275 int64_t &Fmt) {
7276 int64_t Val;
7277 SMLoc Loc = getLoc();
7278
7279 auto Res = parseIntWithPrefix(Pref, Val);
7280 if (Res.isFailure())
7281 return false;
7282 if (Res.isNoMatch())
7283 return true;
7284
7285 if (Val < 0 || Val > MaxVal) {
7286 Error(Loc, Twine("out of range ", StringRef(Pref)));
7287 return false;
7288 }
7289
7290 Fmt = Val;
7291 return true;
7292}
7293
7294ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7295 AMDGPUOperand::ImmTy ImmTy) {
7296 const char *Pref = "index_key";
7297 int64_t ImmVal = 0;
7298 SMLoc Loc = getLoc();
7299 auto Res = parseIntWithPrefix(Pref, ImmVal);
7300 if (!Res.isSuccess())
7301 return Res;
7302
7303 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7304 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7305 (ImmVal < 0 || ImmVal > 1))
7306 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7307
7308 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7309 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7310
7311 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7312 return ParseStatus::Success;
7313}
7314
7315ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7316 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7317}
7318
7319ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7320 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7321}
7322
7323ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7324 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7325}
7326
7327ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7328 StringRef Name,
7329 AMDGPUOperand::ImmTy Type) {
7330 return parseStringOrIntWithPrefix(Operands, Name,
7331 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7332 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7333 "MATRIX_FMT_FP4"},
7334 Type);
7335}
7336
7337ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7338 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7339 AMDGPUOperand::ImmTyMatrixAFMT);
7340}
7341
7342ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7343 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7344 AMDGPUOperand::ImmTyMatrixBFMT);
7345}
7346
7347ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7348 StringRef Name,
7349 AMDGPUOperand::ImmTy Type) {
7350 return parseStringOrIntWithPrefix(
7351 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7352}
7353
7354ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7355 return tryParseMatrixScale(Operands, "matrix_a_scale",
7356 AMDGPUOperand::ImmTyMatrixAScale);
7357}
7358
7359ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7360 return tryParseMatrixScale(Operands, "matrix_b_scale",
7361 AMDGPUOperand::ImmTyMatrixBScale);
7362}
7363
7364ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7365 StringRef Name,
7366 AMDGPUOperand::ImmTy Type) {
7367 return parseStringOrIntWithPrefix(
7368 Operands, Name,
7369 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7370 Type);
7371}
7372
7373ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7374 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7375 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7376}
7377
7378ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7379 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7380 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7381}
7382
7383// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7384// values to live in a joint format operand in the MCInst encoding.
7385ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7386 using namespace llvm::AMDGPU::MTBUFFormat;
7387
7388 int64_t Dfmt = DFMT_UNDEF;
7389 int64_t Nfmt = NFMT_UNDEF;
7390
7391 // dfmt and nfmt can appear in either order, and each is optional.
7392 for (int I = 0; I < 2; ++I) {
7393 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7394 return ParseStatus::Failure;
7395
7396 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7397 return ParseStatus::Failure;
7398
7399 // Skip optional comma between dfmt/nfmt
7400 // but guard against 2 commas following each other.
7401 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7402 !peekToken().is(AsmToken::Comma)) {
7403 trySkipToken(AsmToken::Comma);
7404 }
7405 }
7406
7407 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7408 return ParseStatus::NoMatch;
7409
7410 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7411 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7412
7413 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7414 return ParseStatus::Success;
7415}
7416
7417ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7418 using namespace llvm::AMDGPU::MTBUFFormat;
7419
7420 int64_t Fmt = UFMT_UNDEF;
7421
7422 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7423 return ParseStatus::Failure;
7424
7425 if (Fmt == UFMT_UNDEF)
7426 return ParseStatus::NoMatch;
7427
7428 Format = Fmt;
7429 return ParseStatus::Success;
7430}
7431
7432bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7433 int64_t &Nfmt,
7434 StringRef FormatStr,
7435 SMLoc Loc) {
7436 using namespace llvm::AMDGPU::MTBUFFormat;
7437 int64_t Format;
7438
7439 Format = getDfmt(FormatStr);
7440 if (Format != DFMT_UNDEF) {
7441 Dfmt = Format;
7442 return true;
7443 }
7444
7445 Format = getNfmt(FormatStr, getSTI());
7446 if (Format != NFMT_UNDEF) {
7447 Nfmt = Format;
7448 return true;
7449 }
7450
7451 Error(Loc, "unsupported format");
7452 return false;
7453}
7454
7455ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7456 SMLoc FormatLoc,
7457 int64_t &Format) {
7458 using namespace llvm::AMDGPU::MTBUFFormat;
7459
7460 int64_t Dfmt = DFMT_UNDEF;
7461 int64_t Nfmt = NFMT_UNDEF;
7462 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7463 return ParseStatus::Failure;
7464
7465 if (trySkipToken(AsmToken::Comma)) {
7466 StringRef Str;
7467 SMLoc Loc = getLoc();
7468 if (!parseId(Str, "expected a format string") ||
7469 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7470 return ParseStatus::Failure;
7471 if (Dfmt == DFMT_UNDEF)
7472 return Error(Loc, "duplicate numeric format");
7473 if (Nfmt == NFMT_UNDEF)
7474 return Error(Loc, "duplicate data format");
7475 }
7476
7477 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7478 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7479
7480 if (isGFX10Plus()) {
7481 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7482 if (Ufmt == UFMT_UNDEF)
7483 return Error(FormatLoc, "unsupported format");
7484 Format = Ufmt;
7485 } else {
7486 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7487 }
7488
7489 return ParseStatus::Success;
7490}
7491
7492ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7493 SMLoc Loc,
7494 int64_t &Format) {
7495 using namespace llvm::AMDGPU::MTBUFFormat;
7496
7497 auto Id = getUnifiedFormat(FormatStr, getSTI());
7498 if (Id == UFMT_UNDEF)
7499 return ParseStatus::NoMatch;
7500
7501 if (!isGFX10Plus())
7502 return Error(Loc, "unified format is not supported on this GPU");
7503
7504 Format = Id;
7505 return ParseStatus::Success;
7506}
7507
7508ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7509 using namespace llvm::AMDGPU::MTBUFFormat;
7510 SMLoc Loc = getLoc();
7511
7512 if (!parseExpr(Format))
7513 return ParseStatus::Failure;
7514 if (!isValidFormatEncoding(Format, getSTI()))
7515 return Error(Loc, "out of range format");
7516
7517 return ParseStatus::Success;
7518}
7519
7520ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7521 using namespace llvm::AMDGPU::MTBUFFormat;
7522
7523 if (!trySkipId("format", AsmToken::Colon))
7524 return ParseStatus::NoMatch;
7525
7526 if (trySkipToken(AsmToken::LBrac)) {
7527 StringRef FormatStr;
7528 SMLoc Loc = getLoc();
7529 if (!parseId(FormatStr, "expected a format string"))
7530 return ParseStatus::Failure;
7531
7532 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7533 if (Res.isNoMatch())
7534 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7535 if (!Res.isSuccess())
7536 return Res;
7537
7538 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7539 return ParseStatus::Failure;
7540
7541 return ParseStatus::Success;
7542 }
7543
7544 return parseNumericFormat(Format);
7545}
7546
7547ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7548 using namespace llvm::AMDGPU::MTBUFFormat;
7549
7550 int64_t Format = getDefaultFormatEncoding(getSTI());
7551 ParseStatus Res;
7552 SMLoc Loc = getLoc();
7553
7554 // Parse legacy format syntax.
7555 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7556 if (Res.isFailure())
7557 return Res;
7558
7559 bool FormatFound = Res.isSuccess();
7560
7561 Operands.push_back(
7562 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7563
7564 if (FormatFound)
7565 trySkipToken(AsmToken::Comma);
7566
7567 if (isToken(AsmToken::EndOfStatement)) {
7568 // We are expecting an soffset operand,
7569 // but let matcher handle the error.
7570 return ParseStatus::Success;
7571 }
7572
7573 // Parse soffset.
7574 Res = parseRegOrImm(Operands);
7575 if (!Res.isSuccess())
7576 return Res;
7577
7578 trySkipToken(AsmToken::Comma);
7579
7580 if (!FormatFound) {
7581 Res = parseSymbolicOrNumericFormat(Format);
7582 if (Res.isFailure())
7583 return Res;
7584 if (Res.isSuccess()) {
7585 auto Size = Operands.size();
7586 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7587 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7588 Op.setImm(Format);
7589 }
7590 return ParseStatus::Success;
7591 }
7592
7593 if (isId("format") && peekToken().is(AsmToken::Colon))
7594 return Error(getLoc(), "duplicate format");
7595 return ParseStatus::Success;
7596}
7597
7598ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7599 ParseStatus Res =
7600 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7601 if (Res.isNoMatch()) {
7602 Res = parseIntWithPrefix("inst_offset", Operands,
7603 AMDGPUOperand::ImmTyInstOffset);
7604 }
7605 return Res;
7606}
7607
7608ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7609 ParseStatus Res =
7610 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7611 if (Res.isNoMatch())
7612 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7613 return Res;
7614}
7615
7616ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7617 ParseStatus Res =
7618 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7619 if (Res.isNoMatch()) {
7620 Res =
7621 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7622 }
7623 return Res;
7624}
7625
7626//===----------------------------------------------------------------------===//
7627// Exp
7628//===----------------------------------------------------------------------===//
7629
7630void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7631 OptionalImmIndexMap OptionalIdx;
7632
7633 unsigned OperandIdx[4];
7634 unsigned EnMask = 0;
7635 int SrcIdx = 0;
7636
7637 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7638 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7639
7640 // Add the register arguments
7641 if (Op.isReg()) {
7642 assert(SrcIdx < 4);
7643 OperandIdx[SrcIdx] = Inst.size();
7644 Op.addRegOperands(Inst, 1);
7645 ++SrcIdx;
7646 continue;
7647 }
7648
7649 if (Op.isOff()) {
7650 assert(SrcIdx < 4);
7651 OperandIdx[SrcIdx] = Inst.size();
7652 Inst.addOperand(MCOperand::createReg(MCRegister()));
7653 ++SrcIdx;
7654 continue;
7655 }
7656
7657 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7658 Op.addImmOperands(Inst, 1);
7659 continue;
7660 }
7661
7662 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7663 continue;
7664
7665 // Handle optional arguments
7666 OptionalIdx[Op.getImmTy()] = i;
7667 }
7668
7669 assert(SrcIdx == 4);
7670
7671 bool Compr = false;
7672 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7673 Compr = true;
7674 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7675 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7676 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7677 }
7678
7679 for (auto i = 0; i < SrcIdx; ++i) {
7680 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7681 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7682 }
7683 }
7684
7685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7687
7688 Inst.addOperand(MCOperand::createImm(EnMask));
7689}
7690
7691//===----------------------------------------------------------------------===//
7692// s_waitcnt
7693//===----------------------------------------------------------------------===//
7694
7695static bool
7697 const AMDGPU::IsaVersion ISA,
7698 int64_t &IntVal,
7699 int64_t CntVal,
7700 bool Saturate,
7701 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7702 unsigned (*decode)(const IsaVersion &Version, unsigned))
7703{
7704 bool Failed = false;
7705
7706 IntVal = encode(ISA, IntVal, CntVal);
7707 if (CntVal != decode(ISA, IntVal)) {
7708 if (Saturate) {
7709 IntVal = encode(ISA, IntVal, -1);
7710 } else {
7711 Failed = true;
7712 }
7713 }
7714 return Failed;
7715}
7716
7717bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7718
7719 SMLoc CntLoc = getLoc();
7720 StringRef CntName = getTokenStr();
7721
7722 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7723 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7724 return false;
7725
7726 int64_t CntVal;
7727 SMLoc ValLoc = getLoc();
7728 if (!parseExpr(CntVal))
7729 return false;
7730
7731 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7732
7733 bool Failed = true;
7734 bool Sat = CntName.ends_with("_sat");
7735
7736 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7737 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7738 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7739 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7740 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7741 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7742 } else {
7743 Error(CntLoc, "invalid counter name " + CntName);
7744 return false;
7745 }
7746
7747 if (Failed) {
7748 Error(ValLoc, "too large value for " + CntName);
7749 return false;
7750 }
7751
7752 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7753 return false;
7754
7755 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7756 if (isToken(AsmToken::EndOfStatement)) {
7757 Error(getLoc(), "expected a counter name");
7758 return false;
7759 }
7760 }
7761
7762 return true;
7763}
7764
7765ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7766 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7767 int64_t Waitcnt = getWaitcntBitMask(ISA);
7768 SMLoc S = getLoc();
7769
7770 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7771 while (!isToken(AsmToken::EndOfStatement)) {
7772 if (!parseCnt(Waitcnt))
7773 return ParseStatus::Failure;
7774 }
7775 } else {
7776 if (!parseExpr(Waitcnt))
7777 return ParseStatus::Failure;
7778 }
7779
7780 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7781 return ParseStatus::Success;
7782}
7783
7784bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7785 SMLoc FieldLoc = getLoc();
7786 StringRef FieldName = getTokenStr();
7787 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7788 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7789 return false;
7790
7791 SMLoc ValueLoc = getLoc();
7792 StringRef ValueName = getTokenStr();
7793 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7794 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7795 return false;
7796
7797 unsigned Shift;
7798 if (FieldName == "instid0") {
7799 Shift = 0;
7800 } else if (FieldName == "instskip") {
7801 Shift = 4;
7802 } else if (FieldName == "instid1") {
7803 Shift = 7;
7804 } else {
7805 Error(FieldLoc, "invalid field name " + FieldName);
7806 return false;
7807 }
7808
7809 int Value;
7810 if (Shift == 4) {
7811 // Parse values for instskip.
7812 Value = StringSwitch<int>(ValueName)
7813 .Case("SAME", 0)
7814 .Case("NEXT", 1)
7815 .Case("SKIP_1", 2)
7816 .Case("SKIP_2", 3)
7817 .Case("SKIP_3", 4)
7818 .Case("SKIP_4", 5)
7819 .Default(-1);
7820 } else {
7821 // Parse values for instid0 and instid1.
7822 Value = StringSwitch<int>(ValueName)
7823 .Case("NO_DEP", 0)
7824 .Case("VALU_DEP_1", 1)
7825 .Case("VALU_DEP_2", 2)
7826 .Case("VALU_DEP_3", 3)
7827 .Case("VALU_DEP_4", 4)
7828 .Case("TRANS32_DEP_1", 5)
7829 .Case("TRANS32_DEP_2", 6)
7830 .Case("TRANS32_DEP_3", 7)
7831 .Case("FMA_ACCUM_CYCLE_1", 8)
7832 .Case("SALU_CYCLE_1", 9)
7833 .Case("SALU_CYCLE_2", 10)
7834 .Case("SALU_CYCLE_3", 11)
7835 .Default(-1);
7836 }
7837 if (Value < 0) {
7838 Error(ValueLoc, "invalid value name " + ValueName);
7839 return false;
7840 }
7841
7842 Delay |= Value << Shift;
7843 return true;
7844}
7845
7846ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7847 int64_t Delay = 0;
7848 SMLoc S = getLoc();
7849
7850 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7851 do {
7852 if (!parseDelay(Delay))
7853 return ParseStatus::Failure;
7854 } while (trySkipToken(AsmToken::Pipe));
7855 } else {
7856 if (!parseExpr(Delay))
7857 return ParseStatus::Failure;
7858 }
7859
7860 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7861 return ParseStatus::Success;
7862}
7863
7864bool
7865AMDGPUOperand::isSWaitCnt() const {
7866 return isImm();
7867}
7868
7869bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7870
7871//===----------------------------------------------------------------------===//
7872// DepCtr
7873//===----------------------------------------------------------------------===//
7874
7875void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7876 StringRef DepCtrName) {
7877 switch (ErrorId) {
7878 case OPR_ID_UNKNOWN:
7879 Error(Loc, Twine("invalid counter name ", DepCtrName));
7880 return;
7881 case OPR_ID_UNSUPPORTED:
7882 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7883 return;
7884 case OPR_ID_DUPLICATE:
7885 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7886 return;
7887 case OPR_VAL_INVALID:
7888 Error(Loc, Twine("invalid value for ", DepCtrName));
7889 return;
7890 default:
7891 assert(false);
7892 }
7893}
7894
7895bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7896
7897 using namespace llvm::AMDGPU::DepCtr;
7898
7899 SMLoc DepCtrLoc = getLoc();
7900 StringRef DepCtrName = getTokenStr();
7901
7902 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7903 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7904 return false;
7905
7906 int64_t ExprVal;
7907 if (!parseExpr(ExprVal))
7908 return false;
7909
7910 unsigned PrevOprMask = UsedOprMask;
7911 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7912
7913 if (CntVal < 0) {
7914 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7915 return false;
7916 }
7917
7918 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7919 return false;
7920
7921 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7922 if (isToken(AsmToken::EndOfStatement)) {
7923 Error(getLoc(), "expected a counter name");
7924 return false;
7925 }
7926 }
7927
7928 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7929 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7930 return true;
7931}
7932
7933ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7934 using namespace llvm::AMDGPU::DepCtr;
7935
7936 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7937 SMLoc Loc = getLoc();
7938
7939 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7940 unsigned UsedOprMask = 0;
7941 while (!isToken(AsmToken::EndOfStatement)) {
7942 if (!parseDepCtr(DepCtr, UsedOprMask))
7943 return ParseStatus::Failure;
7944 }
7945 } else {
7946 if (!parseExpr(DepCtr))
7947 return ParseStatus::Failure;
7948 }
7949
7950 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7951 return ParseStatus::Success;
7952}
7953
7954bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7955
7956//===----------------------------------------------------------------------===//
7957// hwreg
7958//===----------------------------------------------------------------------===//
7959
7960ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7961 OperandInfoTy &Offset,
7962 OperandInfoTy &Width) {
7963 using namespace llvm::AMDGPU::Hwreg;
7964
7965 if (!trySkipId("hwreg", AsmToken::LParen))
7966 return ParseStatus::NoMatch;
7967
7968 // The register may be specified by name or using a numeric code
7969 HwReg.Loc = getLoc();
7970 if (isToken(AsmToken::Identifier) &&
7971 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7972 HwReg.IsSymbolic = true;
7973 lex(); // skip register name
7974 } else if (!parseExpr(HwReg.Val, "a register name")) {
7975 return ParseStatus::Failure;
7976 }
7977
7978 if (trySkipToken(AsmToken::RParen))
7979 return ParseStatus::Success;
7980
7981 // parse optional params
7982 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7983 return ParseStatus::Failure;
7984
7985 Offset.Loc = getLoc();
7986 if (!parseExpr(Offset.Val))
7987 return ParseStatus::Failure;
7988
7989 if (!skipToken(AsmToken::Comma, "expected a comma"))
7990 return ParseStatus::Failure;
7991
7992 Width.Loc = getLoc();
7993 if (!parseExpr(Width.Val) ||
7994 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7995 return ParseStatus::Failure;
7996
7997 return ParseStatus::Success;
7998}
7999
8000ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8001 using namespace llvm::AMDGPU::Hwreg;
8002
8003 int64_t ImmVal = 0;
8004 SMLoc Loc = getLoc();
8005
8006 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8007 HwregId::Default);
8008 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8009 HwregOffset::Default);
8010 struct : StructuredOpField {
8011 using StructuredOpField::StructuredOpField;
8012 bool validate(AMDGPUAsmParser &Parser) const override {
8013 if (!isUIntN(Width, Val - 1))
8014 return Error(Parser, "only values from 1 to 32 are legal");
8015 return true;
8016 }
8017 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8018 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8019
8020 if (Res.isNoMatch())
8021 Res = parseHwregFunc(HwReg, Offset, Width);
8022
8023 if (Res.isSuccess()) {
8024 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8025 return ParseStatus::Failure;
8026 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8027 }
8028
8029 if (Res.isNoMatch() &&
8030 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8032
8033 if (!Res.isSuccess())
8034 return ParseStatus::Failure;
8035
8036 if (!isUInt<16>(ImmVal))
8037 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8038 Operands.push_back(
8039 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8040 return ParseStatus::Success;
8041}
8042
8043bool AMDGPUOperand::isHwreg() const {
8044 return isImmTy(ImmTyHwreg);
8045}
8046
8047//===----------------------------------------------------------------------===//
8048// sendmsg
8049//===----------------------------------------------------------------------===//
8050
8051bool
8052AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8053 OperandInfoTy &Op,
8054 OperandInfoTy &Stream) {
8055 using namespace llvm::AMDGPU::SendMsg;
8056
8057 Msg.Loc = getLoc();
8058 if (isToken(AsmToken::Identifier) &&
8059 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8060 Msg.IsSymbolic = true;
8061 lex(); // skip message name
8062 } else if (!parseExpr(Msg.Val, "a message name")) {
8063 return false;
8064 }
8065
8066 if (trySkipToken(AsmToken::Comma)) {
8067 Op.IsDefined = true;
8068 Op.Loc = getLoc();
8069 if (isToken(AsmToken::Identifier) &&
8070 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8072 lex(); // skip operation name
8073 } else if (!parseExpr(Op.Val, "an operation name")) {
8074 return false;
8075 }
8076
8077 if (trySkipToken(AsmToken::Comma)) {
8078 Stream.IsDefined = true;
8079 Stream.Loc = getLoc();
8080 if (!parseExpr(Stream.Val))
8081 return false;
8082 }
8083 }
8084
8085 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8086}
8087
8088bool
8089AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8090 const OperandInfoTy &Op,
8091 const OperandInfoTy &Stream) {
8092 using namespace llvm::AMDGPU::SendMsg;
8093
8094 // Validation strictness depends on whether message is specified
8095 // in a symbolic or in a numeric form. In the latter case
8096 // only encoding possibility is checked.
8097 bool Strict = Msg.IsSymbolic;
8098
8099 if (Strict) {
8100 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8101 Error(Msg.Loc, "specified message id is not supported on this GPU");
8102 return false;
8103 }
8104 } else {
8105 if (!isValidMsgId(Msg.Val, getSTI())) {
8106 Error(Msg.Loc, "invalid message id");
8107 return false;
8108 }
8109 }
8110 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8111 if (Op.IsDefined) {
8112 Error(Op.Loc, "message does not support operations");
8113 } else {
8114 Error(Msg.Loc, "missing message operation");
8115 }
8116 return false;
8117 }
8118 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8119 if (Op.Val == OPR_ID_UNSUPPORTED)
8120 Error(Op.Loc, "specified operation id is not supported on this GPU");
8121 else
8122 Error(Op.Loc, "invalid operation id");
8123 return false;
8124 }
8125 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8126 Stream.IsDefined) {
8127 Error(Stream.Loc, "message operation does not support streams");
8128 return false;
8129 }
8130 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8131 Error(Stream.Loc, "invalid message stream id");
8132 return false;
8133 }
8134 return true;
8135}
8136
8137ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8138 using namespace llvm::AMDGPU::SendMsg;
8139
8140 int64_t ImmVal = 0;
8141 SMLoc Loc = getLoc();
8142
8143 if (trySkipId("sendmsg", AsmToken::LParen)) {
8144 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8145 OperandInfoTy Op(OP_NONE_);
8146 OperandInfoTy Stream(STREAM_ID_NONE_);
8147 if (parseSendMsgBody(Msg, Op, Stream) &&
8148 validateSendMsg(Msg, Op, Stream)) {
8149 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8150 } else {
8151 return ParseStatus::Failure;
8152 }
8153 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8154 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8155 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8156 } else {
8157 return ParseStatus::Failure;
8158 }
8159
8160 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8161 return ParseStatus::Success;
8162}
8163
8164bool AMDGPUOperand::isSendMsg() const {
8165 return isImmTy(ImmTySendMsg);
8166}
8167
8168//===----------------------------------------------------------------------===//
8169// v_interp
8170//===----------------------------------------------------------------------===//
8171
8172ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8173 StringRef Str;
8174 SMLoc S = getLoc();
8175
8176 if (!parseId(Str))
8177 return ParseStatus::NoMatch;
8178
8179 int Slot = StringSwitch<int>(Str)
8180 .Case("p10", 0)
8181 .Case("p20", 1)
8182 .Case("p0", 2)
8183 .Default(-1);
8184
8185 if (Slot == -1)
8186 return Error(S, "invalid interpolation slot");
8187
8188 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8189 AMDGPUOperand::ImmTyInterpSlot));
8190 return ParseStatus::Success;
8191}
8192
8193ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8194 StringRef Str;
8195 SMLoc S = getLoc();
8196
8197 if (!parseId(Str))
8198 return ParseStatus::NoMatch;
8199
8200 if (!Str.starts_with("attr"))
8201 return Error(S, "invalid interpolation attribute");
8202
8203 StringRef Chan = Str.take_back(2);
8204 int AttrChan = StringSwitch<int>(Chan)
8205 .Case(".x", 0)
8206 .Case(".y", 1)
8207 .Case(".z", 2)
8208 .Case(".w", 3)
8209 .Default(-1);
8210 if (AttrChan == -1)
8211 return Error(S, "invalid or missing interpolation attribute channel");
8212
8213 Str = Str.drop_back(2).drop_front(4);
8214
8215 uint8_t Attr;
8216 if (Str.getAsInteger(10, Attr))
8217 return Error(S, "invalid or missing interpolation attribute number");
8218
8219 if (Attr > 32)
8220 return Error(S, "out of bounds interpolation attribute number");
8221
8222 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8223
8224 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8225 AMDGPUOperand::ImmTyInterpAttr));
8226 Operands.push_back(AMDGPUOperand::CreateImm(
8227 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8228 return ParseStatus::Success;
8229}
8230
8231//===----------------------------------------------------------------------===//
8232// exp
8233//===----------------------------------------------------------------------===//
8234
8235ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8236 using namespace llvm::AMDGPU::Exp;
8237
8238 StringRef Str;
8239 SMLoc S = getLoc();
8240
8241 if (!parseId(Str))
8242 return ParseStatus::NoMatch;
8243
8244 unsigned Id = getTgtId(Str);
8245 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8246 return Error(S, (Id == ET_INVALID)
8247 ? "invalid exp target"
8248 : "exp target is not supported on this GPU");
8249
8250 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8251 AMDGPUOperand::ImmTyExpTgt));
8252 return ParseStatus::Success;
8253}
8254
8255//===----------------------------------------------------------------------===//
8256// parser helpers
8257//===----------------------------------------------------------------------===//
8258
8259bool
8260AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8261 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8262}
8263
8264bool
8265AMDGPUAsmParser::isId(const StringRef Id) const {
8266 return isId(getToken(), Id);
8267}
8268
8269bool
8270AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8271 return getTokenKind() == Kind;
8272}
8273
8274StringRef AMDGPUAsmParser::getId() const {
8275 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8276}
8277
8278bool
8279AMDGPUAsmParser::trySkipId(const StringRef Id) {
8280 if (isId(Id)) {
8281 lex();
8282 return true;
8283 }
8284 return false;
8285}
8286
8287bool
8288AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8289 if (isToken(AsmToken::Identifier)) {
8290 StringRef Tok = getTokenStr();
8291 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8292 lex();
8293 return true;
8294 }
8295 }
8296 return false;
8297}
8298
8299bool
8300AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8301 if (isId(Id) && peekToken().is(Kind)) {
8302 lex();
8303 lex();
8304 return true;
8305 }
8306 return false;
8307}
8308
8309bool
8310AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8311 if (isToken(Kind)) {
8312 lex();
8313 return true;
8314 }
8315 return false;
8316}
8317
8318bool
8319AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8320 const StringRef ErrMsg) {
8321 if (!trySkipToken(Kind)) {
8322 Error(getLoc(), ErrMsg);
8323 return false;
8324 }
8325 return true;
8326}
8327
8328bool
8329AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8330 SMLoc S = getLoc();
8331
8332 const MCExpr *Expr;
8333 if (Parser.parseExpression(Expr))
8334 return false;
8335
8336 if (Expr->evaluateAsAbsolute(Imm))
8337 return true;
8338
8339 if (Expected.empty()) {
8340 Error(S, "expected absolute expression");
8341 } else {
8342 Error(S, Twine("expected ", Expected) +
8343 Twine(" or an absolute expression"));
8344 }
8345 return false;
8346}
8347
8348bool
8349AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8350 SMLoc S = getLoc();
8351
8352 const MCExpr *Expr;
8353 if (Parser.parseExpression(Expr))
8354 return false;
8355
8356 int64_t IntVal;
8357 if (Expr->evaluateAsAbsolute(IntVal)) {
8358 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8359 } else {
8360 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8361 }
8362 return true;
8363}
8364
8365bool
8366AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8367 if (isToken(AsmToken::String)) {
8368 Val = getToken().getStringContents();
8369 lex();
8370 return true;
8371 }
8372 Error(getLoc(), ErrMsg);
8373 return false;
8374}
8375
8376bool
8377AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8378 if (isToken(AsmToken::Identifier)) {
8379 Val = getTokenStr();
8380 lex();
8381 return true;
8382 }
8383 if (!ErrMsg.empty())
8384 Error(getLoc(), ErrMsg);
8385 return false;
8386}
8387
8388AsmToken
8389AMDGPUAsmParser::getToken() const {
8390 return Parser.getTok();
8391}
8392
8393AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8394 return isToken(AsmToken::EndOfStatement)
8395 ? getToken()
8396 : getLexer().peekTok(ShouldSkipSpace);
8397}
8398
8399void
8400AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8401 auto TokCount = getLexer().peekTokens(Tokens);
8402
8403 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8404 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8405}
8406
8408AMDGPUAsmParser::getTokenKind() const {
8409 return getLexer().getKind();
8410}
8411
8412SMLoc
8413AMDGPUAsmParser::getLoc() const {
8414 return getToken().getLoc();
8415}
8416
8417StringRef
8418AMDGPUAsmParser::getTokenStr() const {
8419 return getToken().getString();
8420}
8421
8422void
8423AMDGPUAsmParser::lex() {
8424 Parser.Lex();
8425}
8426
8427SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8428 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8429}
8430
8431// Returns one of the given locations that comes later in the source.
8432SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8433 return a.getPointer() < b.getPointer() ? b : a;
8434}
8435
8436SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8437 int MCOpIdx) const {
8438 for (const auto &Op : Operands) {
8439 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8440 if (TargetOp.getMCOpIdx() == MCOpIdx)
8441 return TargetOp.getStartLoc();
8442 }
8443 llvm_unreachable("No such MC operand!");
8444}
8445
8446SMLoc
8447AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8448 const OperandVector &Operands) const {
8449 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8451 if (Test(Op))
8452 return Op.getStartLoc();
8453 }
8454 return getInstLoc(Operands);
8455}
8456
8457SMLoc
8458AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8459 const OperandVector &Operands) const {
8460 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8461 return getOperandLoc(Test, Operands);
8462}
8463
8464ParseStatus
8465AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8466 if (!trySkipToken(AsmToken::LCurly))
8467 return ParseStatus::NoMatch;
8468
8469 bool First = true;
8470 while (!trySkipToken(AsmToken::RCurly)) {
8471 if (!First &&
8472 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8473 return ParseStatus::Failure;
8474
8475 StringRef Id = getTokenStr();
8476 SMLoc IdLoc = getLoc();
8477 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8478 !skipToken(AsmToken::Colon, "colon expected"))
8479 return ParseStatus::Failure;
8480
8481 const auto *I =
8482 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8483 if (I == Fields.end())
8484 return Error(IdLoc, "unknown field");
8485 if ((*I)->IsDefined)
8486 return Error(IdLoc, "duplicate field");
8487
8488 // TODO: Support symbolic values.
8489 (*I)->Loc = getLoc();
8490 if (!parseExpr((*I)->Val))
8491 return ParseStatus::Failure;
8492 (*I)->IsDefined = true;
8493
8494 First = false;
8495 }
8496 return ParseStatus::Success;
8497}
8498
8499bool AMDGPUAsmParser::validateStructuredOpFields(
8501 return all_of(Fields, [this](const StructuredOpField *F) {
8502 return F->validate(*this);
8503 });
8504}
8505
8506//===----------------------------------------------------------------------===//
8507// swizzle
8508//===----------------------------------------------------------------------===//
8509
8511static unsigned
8512encodeBitmaskPerm(const unsigned AndMask,
8513 const unsigned OrMask,
8514 const unsigned XorMask) {
8515 using namespace llvm::AMDGPU::Swizzle;
8516
8517 return BITMASK_PERM_ENC |
8518 (AndMask << BITMASK_AND_SHIFT) |
8519 (OrMask << BITMASK_OR_SHIFT) |
8520 (XorMask << BITMASK_XOR_SHIFT);
8521}
8522
8523bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8524 const unsigned MaxVal,
8525 const Twine &ErrMsg, SMLoc &Loc) {
8526 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8527 return false;
8528 }
8529 Loc = getLoc();
8530 if (!parseExpr(Op)) {
8531 return false;
8532 }
8533 if (Op < MinVal || Op > MaxVal) {
8534 Error(Loc, ErrMsg);
8535 return false;
8536 }
8537
8538 return true;
8539}
8540
8541bool
8542AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8543 const unsigned MinVal,
8544 const unsigned MaxVal,
8545 const StringRef ErrMsg) {
8546 SMLoc Loc;
8547 for (unsigned i = 0; i < OpNum; ++i) {
8548 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8549 return false;
8550 }
8551
8552 return true;
8553}
8554
8555bool
8556AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8557 using namespace llvm::AMDGPU::Swizzle;
8558
8559 int64_t Lane[LANE_NUM];
8560 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8561 "expected a 2-bit lane id")) {
8563 for (unsigned I = 0; I < LANE_NUM; ++I) {
8564 Imm |= Lane[I] << (LANE_SHIFT * I);
8565 }
8566 return true;
8567 }
8568 return false;
8569}
8570
8571bool
8572AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8573 using namespace llvm::AMDGPU::Swizzle;
8574
8575 SMLoc Loc;
8576 int64_t GroupSize;
8577 int64_t LaneIdx;
8578
8579 if (!parseSwizzleOperand(GroupSize,
8580 2, 32,
8581 "group size must be in the interval [2,32]",
8582 Loc)) {
8583 return false;
8584 }
8585 if (!isPowerOf2_64(GroupSize)) {
8586 Error(Loc, "group size must be a power of two");
8587 return false;
8588 }
8589 if (parseSwizzleOperand(LaneIdx,
8590 0, GroupSize - 1,
8591 "lane id must be in the interval [0,group size - 1]",
8592 Loc)) {
8593 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8594 return true;
8595 }
8596 return false;
8597}
8598
8599bool
8600AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8601 using namespace llvm::AMDGPU::Swizzle;
8602
8603 SMLoc Loc;
8604 int64_t GroupSize;
8605
8606 if (!parseSwizzleOperand(GroupSize,
8607 2, 32,
8608 "group size must be in the interval [2,32]",
8609 Loc)) {
8610 return false;
8611 }
8612 if (!isPowerOf2_64(GroupSize)) {
8613 Error(Loc, "group size must be a power of two");
8614 return false;
8615 }
8616
8617 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8618 return true;
8619}
8620
8621bool
8622AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8623 using namespace llvm::AMDGPU::Swizzle;
8624
8625 SMLoc Loc;
8626 int64_t GroupSize;
8627
8628 if (!parseSwizzleOperand(GroupSize,
8629 1, 16,
8630 "group size must be in the interval [1,16]",
8631 Loc)) {
8632 return false;
8633 }
8634 if (!isPowerOf2_64(GroupSize)) {
8635 Error(Loc, "group size must be a power of two");
8636 return false;
8637 }
8638
8639 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8640 return true;
8641}
8642
8643bool
8644AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8645 using namespace llvm::AMDGPU::Swizzle;
8646
8647 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8648 return false;
8649 }
8650
8651 StringRef Ctl;
8652 SMLoc StrLoc = getLoc();
8653 if (!parseString(Ctl)) {
8654 return false;
8655 }
8656 if (Ctl.size() != BITMASK_WIDTH) {
8657 Error(StrLoc, "expected a 5-character mask");
8658 return false;
8659 }
8660
8661 unsigned AndMask = 0;
8662 unsigned OrMask = 0;
8663 unsigned XorMask = 0;
8664
8665 for (size_t i = 0; i < Ctl.size(); ++i) {
8666 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8667 switch(Ctl[i]) {
8668 default:
8669 Error(StrLoc, "invalid mask");
8670 return false;
8671 case '0':
8672 break;
8673 case '1':
8674 OrMask |= Mask;
8675 break;
8676 case 'p':
8677 AndMask |= Mask;
8678 break;
8679 case 'i':
8680 AndMask |= Mask;
8681 XorMask |= Mask;
8682 break;
8683 }
8684 }
8685
8686 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8687 return true;
8688}
8689
8690bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8691 using namespace llvm::AMDGPU::Swizzle;
8692
8693 if (!AMDGPU::isGFX9Plus(getSTI())) {
8694 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8695 return false;
8696 }
8697
8698 int64_t Swizzle;
8699 SMLoc Loc;
8700 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8701 "FFT swizzle must be in the interval [0," +
8702 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8703 Loc))
8704 return false;
8705
8706 Imm = FFT_MODE_ENC | Swizzle;
8707 return true;
8708}
8709
8710bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8711 using namespace llvm::AMDGPU::Swizzle;
8712
8713 if (!AMDGPU::isGFX9Plus(getSTI())) {
8714 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8715 return false;
8716 }
8717
8718 SMLoc Loc;
8719 int64_t Direction;
8720
8721 if (!parseSwizzleOperand(Direction, 0, 1,
8722 "direction must be 0 (left) or 1 (right)", Loc))
8723 return false;
8724
8725 int64_t RotateSize;
8726 if (!parseSwizzleOperand(
8727 RotateSize, 0, ROTATE_MAX_SIZE,
8728 "number of threads to rotate must be in the interval [0," +
8729 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8730 Loc))
8731 return false;
8732
8734 (RotateSize << ROTATE_SIZE_SHIFT);
8735 return true;
8736}
8737
8738bool
8739AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8740
8741 SMLoc OffsetLoc = getLoc();
8742
8743 if (!parseExpr(Imm, "a swizzle macro")) {
8744 return false;
8745 }
8746 if (!isUInt<16>(Imm)) {
8747 Error(OffsetLoc, "expected a 16-bit offset");
8748 return false;
8749 }
8750 return true;
8751}
8752
8753bool
8754AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8755 using namespace llvm::AMDGPU::Swizzle;
8756
8757 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8758
8759 SMLoc ModeLoc = getLoc();
8760 bool Ok = false;
8761
8762 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8763 Ok = parseSwizzleQuadPerm(Imm);
8764 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8765 Ok = parseSwizzleBitmaskPerm(Imm);
8766 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8767 Ok = parseSwizzleBroadcast(Imm);
8768 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8769 Ok = parseSwizzleSwap(Imm);
8770 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8771 Ok = parseSwizzleReverse(Imm);
8772 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8773 Ok = parseSwizzleFFT(Imm);
8774 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8775 Ok = parseSwizzleRotate(Imm);
8776 } else {
8777 Error(ModeLoc, "expected a swizzle mode");
8778 }
8779
8780 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8781 }
8782
8783 return false;
8784}
8785
8786ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8787 SMLoc S = getLoc();
8788 int64_t Imm = 0;
8789
8790 if (trySkipId("offset")) {
8791
8792 bool Ok = false;
8793 if (skipToken(AsmToken::Colon, "expected a colon")) {
8794 if (trySkipId("swizzle")) {
8795 Ok = parseSwizzleMacro(Imm);
8796 } else {
8797 Ok = parseSwizzleOffset(Imm);
8798 }
8799 }
8800
8801 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8802
8804 }
8805 return ParseStatus::NoMatch;
8806}
8807
8808bool
8809AMDGPUOperand::isSwizzle() const {
8810 return isImmTy(ImmTySwizzle);
8811}
8812
8813//===----------------------------------------------------------------------===//
8814// VGPR Index Mode
8815//===----------------------------------------------------------------------===//
8816
8817int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8818
8819 using namespace llvm::AMDGPU::VGPRIndexMode;
8820
8821 if (trySkipToken(AsmToken::RParen)) {
8822 return OFF;
8823 }
8824
8825 int64_t Imm = 0;
8826
8827 while (true) {
8828 unsigned Mode = 0;
8829 SMLoc S = getLoc();
8830
8831 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8832 if (trySkipId(IdSymbolic[ModeId])) {
8833 Mode = 1 << ModeId;
8834 break;
8835 }
8836 }
8837
8838 if (Mode == 0) {
8839 Error(S, (Imm == 0)?
8840 "expected a VGPR index mode or a closing parenthesis" :
8841 "expected a VGPR index mode");
8842 return UNDEF;
8843 }
8844
8845 if (Imm & Mode) {
8846 Error(S, "duplicate VGPR index mode");
8847 return UNDEF;
8848 }
8849 Imm |= Mode;
8850
8851 if (trySkipToken(AsmToken::RParen))
8852 break;
8853 if (!skipToken(AsmToken::Comma,
8854 "expected a comma or a closing parenthesis"))
8855 return UNDEF;
8856 }
8857
8858 return Imm;
8859}
8860
8861ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8862
8863 using namespace llvm::AMDGPU::VGPRIndexMode;
8864
8865 int64_t Imm = 0;
8866 SMLoc S = getLoc();
8867
8868 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8869 Imm = parseGPRIdxMacro();
8870 if (Imm == UNDEF)
8871 return ParseStatus::Failure;
8872 } else {
8873 if (getParser().parseAbsoluteExpression(Imm))
8874 return ParseStatus::Failure;
8875 if (Imm < 0 || !isUInt<4>(Imm))
8876 return Error(S, "invalid immediate: only 4-bit values are legal");
8877 }
8878
8879 Operands.push_back(
8880 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8881 return ParseStatus::Success;
8882}
8883
8884bool AMDGPUOperand::isGPRIdxMode() const {
8885 return isImmTy(ImmTyGprIdxMode);
8886}
8887
8888//===----------------------------------------------------------------------===//
8889// sopp branch targets
8890//===----------------------------------------------------------------------===//
8891
8892ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8893
8894 // Make sure we are not parsing something
8895 // that looks like a label or an expression but is not.
8896 // This will improve error messages.
8897 if (isRegister() || isModifier())
8898 return ParseStatus::NoMatch;
8899
8900 if (!parseExpr(Operands))
8901 return ParseStatus::Failure;
8902
8903 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8904 assert(Opr.isImm() || Opr.isExpr());
8905 SMLoc Loc = Opr.getStartLoc();
8906
8907 // Currently we do not support arbitrary expressions as branch targets.
8908 // Only labels and absolute expressions are accepted.
8909 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8910 Error(Loc, "expected an absolute expression or a label");
8911 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8912 Error(Loc, "expected a 16-bit signed jump offset");
8913 }
8914
8915 return ParseStatus::Success;
8916}
8917
8918//===----------------------------------------------------------------------===//
8919// Boolean holding registers
8920//===----------------------------------------------------------------------===//
8921
8922ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8923 return parseReg(Operands);
8924}
8925
8926//===----------------------------------------------------------------------===//
8927// mubuf
8928//===----------------------------------------------------------------------===//
8929
8930void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8931 const OperandVector &Operands,
8932 bool IsAtomic) {
8933 OptionalImmIndexMap OptionalIdx;
8934 unsigned FirstOperandIdx = 1;
8935 bool IsAtomicReturn = false;
8936
8937 if (IsAtomic) {
8938 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8940 }
8941
8942 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8943 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8944
8945 // Add the register arguments
8946 if (Op.isReg()) {
8947 Op.addRegOperands(Inst, 1);
8948 // Insert a tied src for atomic return dst.
8949 // This cannot be postponed as subsequent calls to
8950 // addImmOperands rely on correct number of MC operands.
8951 if (IsAtomicReturn && i == FirstOperandIdx)
8952 Op.addRegOperands(Inst, 1);
8953 continue;
8954 }
8955
8956 // Handle the case where soffset is an immediate
8957 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8958 Op.addImmOperands(Inst, 1);
8959 continue;
8960 }
8961
8962 // Handle tokens like 'offen' which are sometimes hard-coded into the
8963 // asm string. There are no MCInst operands for these.
8964 if (Op.isToken()) {
8965 continue;
8966 }
8967 assert(Op.isImm());
8968
8969 // Handle optional arguments
8970 OptionalIdx[Op.getImmTy()] = i;
8971 }
8972
8973 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8974 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8975}
8976
8977//===----------------------------------------------------------------------===//
8978// smrd
8979//===----------------------------------------------------------------------===//
8980
8981bool AMDGPUOperand::isSMRDOffset8() const {
8982 return isImmLiteral() && isUInt<8>(getImm());
8983}
8984
8985bool AMDGPUOperand::isSMEMOffset() const {
8986 // Offset range is checked later by validator.
8987 return isImmLiteral();
8988}
8989
8990bool AMDGPUOperand::isSMRDLiteralOffset() const {
8991 // 32-bit literals are only supported on CI and we only want to use them
8992 // when the offset is > 8-bits.
8993 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8994}
8995
8996//===----------------------------------------------------------------------===//
8997// vop3
8998//===----------------------------------------------------------------------===//
8999
9000static bool ConvertOmodMul(int64_t &Mul) {
9001 if (Mul != 1 && Mul != 2 && Mul != 4)
9002 return false;
9003
9004 Mul >>= 1;
9005 return true;
9006}
9007
9008static bool ConvertOmodDiv(int64_t &Div) {
9009 if (Div == 1) {
9010 Div = 0;
9011 return true;
9012 }
9013
9014 if (Div == 2) {
9015 Div = 3;
9016 return true;
9017 }
9018
9019 return false;
9020}
9021
9022// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9023// This is intentional and ensures compatibility with sp3.
9024// See bug 35397 for details.
9025bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9026 if (BoundCtrl == 0 || BoundCtrl == 1) {
9027 if (!isGFX11Plus())
9028 BoundCtrl = 1;
9029 return true;
9030 }
9031 return false;
9032}
9033
9034void AMDGPUAsmParser::onBeginOfFile() {
9035 if (!getParser().getStreamer().getTargetStreamer() ||
9036 getSTI().getTargetTriple().getArch() == Triple::r600)
9037 return;
9038
9039 if (!getTargetStreamer().getTargetID())
9040 getTargetStreamer().initializeTargetID(getSTI(),
9041 getSTI().getFeatureString());
9042
9043 if (isHsaAbi(getSTI()))
9044 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9045}
9046
9047/// Parse AMDGPU specific expressions.
9048///
9049/// expr ::= or(expr, ...) |
9050/// max(expr, ...)
9051///
9052bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9053 using AGVK = AMDGPUMCExpr::VariantKind;
9054
9055 if (isToken(AsmToken::Identifier)) {
9056 StringRef TokenId = getTokenStr();
9057 AGVK VK = StringSwitch<AGVK>(TokenId)
9058 .Case("max", AGVK::AGVK_Max)
9059 .Case("or", AGVK::AGVK_Or)
9060 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9061 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9062 .Case("alignto", AGVK::AGVK_AlignTo)
9063 .Case("occupancy", AGVK::AGVK_Occupancy)
9064 .Default(AGVK::AGVK_None);
9065
9066 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9068 uint64_t CommaCount = 0;
9069 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9070 lex(); // Eat '('
9071 while (true) {
9072 if (trySkipToken(AsmToken::RParen)) {
9073 if (Exprs.empty()) {
9074 Error(getToken().getLoc(),
9075 "empty " + Twine(TokenId) + " expression");
9076 return true;
9077 }
9078 if (CommaCount + 1 != Exprs.size()) {
9079 Error(getToken().getLoc(),
9080 "mismatch of commas in " + Twine(TokenId) + " expression");
9081 return true;
9082 }
9083 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9084 return false;
9085 }
9086 const MCExpr *Expr;
9087 if (getParser().parseExpression(Expr, EndLoc))
9088 return true;
9089 Exprs.push_back(Expr);
9090 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9091 if (LastTokenWasComma)
9092 CommaCount++;
9093 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9094 Error(getToken().getLoc(),
9095 "unexpected token in " + Twine(TokenId) + " expression");
9096 return true;
9097 }
9098 }
9099 }
9100 }
9101 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9102}
9103
9104ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9105 StringRef Name = getTokenStr();
9106 if (Name == "mul") {
9107 return parseIntWithPrefix("mul", Operands,
9108 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9109 }
9110
9111 if (Name == "div") {
9112 return parseIntWithPrefix("div", Operands,
9113 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9114 }
9115
9116 return ParseStatus::NoMatch;
9117}
9118
9119// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9120// the number of src operands present, then copies that bit into src0_modifiers.
9121static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9122 int Opc = Inst.getOpcode();
9123 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9124 if (OpSelIdx == -1)
9125 return;
9126
9127 int SrcNum;
9128 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9129 AMDGPU::OpName::src2};
9130 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9131 ++SrcNum)
9132 ;
9133 assert(SrcNum > 0);
9134
9135 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9136
9137 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9138 if (DstIdx == -1)
9139 return;
9140
9141 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9142 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9143 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9144 if (DstOp.isReg() &&
9145 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9147 ModVal |= SISrcMods::DST_OP_SEL;
9148 } else {
9149 if ((OpSel & (1 << SrcNum)) != 0)
9150 ModVal |= SISrcMods::DST_OP_SEL;
9151 }
9152 Inst.getOperand(ModIdx).setImm(ModVal);
9153}
9154
9155void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9156 const OperandVector &Operands) {
9157 cvtVOP3P(Inst, Operands);
9158 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9159}
9160
9161void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9162 OptionalImmIndexMap &OptionalIdx) {
9163 cvtVOP3P(Inst, Operands, OptionalIdx);
9164 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9165}
9166
9167static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9168 return
9169 // 1. This operand is input modifiers
9170 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9171 // 2. This is not last operand
9172 && Desc.NumOperands > (OpNum + 1)
9173 // 3. Next operand is register class
9174 && Desc.operands()[OpNum + 1].RegClass != -1
9175 // 4. Next register is not tied to any other operand
9176 && Desc.getOperandConstraint(OpNum + 1,
9178}
9179
9180void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9181{
9182 OptionalImmIndexMap OptionalIdx;
9183 unsigned Opc = Inst.getOpcode();
9184
9185 unsigned I = 1;
9186 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9187 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9188 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9189 }
9190
9191 for (unsigned E = Operands.size(); I != E; ++I) {
9192 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9194 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9195 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9196 Op.isInterpAttrChan()) {
9197 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9198 } else if (Op.isImmModifier()) {
9199 OptionalIdx[Op.getImmTy()] = I;
9200 } else {
9201 llvm_unreachable("unhandled operand type");
9202 }
9203 }
9204
9205 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9206 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9207 AMDGPUOperand::ImmTyHigh);
9208
9209 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9210 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9211 AMDGPUOperand::ImmTyClamp);
9212
9213 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9214 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9215 AMDGPUOperand::ImmTyOModSI);
9216}
9217
9218void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9219{
9220 OptionalImmIndexMap OptionalIdx;
9221 unsigned Opc = Inst.getOpcode();
9222
9223 unsigned I = 1;
9224 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9225 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9226 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9227 }
9228
9229 for (unsigned E = Operands.size(); I != E; ++I) {
9230 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9232 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9233 } else if (Op.isImmModifier()) {
9234 OptionalIdx[Op.getImmTy()] = I;
9235 } else {
9236 llvm_unreachable("unhandled operand type");
9237 }
9238 }
9239
9240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9241
9242 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9243 if (OpSelIdx != -1)
9244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9245
9246 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9247
9248 if (OpSelIdx == -1)
9249 return;
9250
9251 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9252 AMDGPU::OpName::src2};
9253 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9254 AMDGPU::OpName::src1_modifiers,
9255 AMDGPU::OpName::src2_modifiers};
9256
9257 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9258
9259 for (int J = 0; J < 3; ++J) {
9260 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9261 if (OpIdx == -1)
9262 break;
9263
9264 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9265 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9266
9267 if ((OpSel & (1 << J)) != 0)
9268 ModVal |= SISrcMods::OP_SEL_0;
9269 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9270 (OpSel & (1 << 3)) != 0)
9271 ModVal |= SISrcMods::DST_OP_SEL;
9272
9273 Inst.getOperand(ModIdx).setImm(ModVal);
9274 }
9275}
9276void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9277 const OperandVector &Operands) {
9278 OptionalImmIndexMap OptionalIdx;
9279 unsigned Opc = Inst.getOpcode();
9280 unsigned I = 1;
9281 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9282
9283 const MCInstrDesc &Desc = MII.get(Opc);
9284
9285 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9286 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9287
9288 for (unsigned E = Operands.size(); I != E; ++I) {
9289 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9290 int NumOperands = Inst.getNumOperands();
9291 // The order of operands in MCInst and parsed operands are different.
9292 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9293 // indices for parsing scale values correctly.
9294 if (NumOperands == CbszOpIdx) {
9297 }
9298 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9299 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9300 } else if (Op.isImmModifier()) {
9301 OptionalIdx[Op.getImmTy()] = I;
9302 } else {
9303 Op.addRegOrImmOperands(Inst, 1);
9304 }
9305 }
9306
9307 // Insert CBSZ and BLGP operands for F8F6F4 variants
9308 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9309 if (CbszIdx != OptionalIdx.end()) {
9310 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9311 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9312 }
9313
9314 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9315 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9316 if (BlgpIdx != OptionalIdx.end()) {
9317 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9318 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9319 }
9320
9321 // Add dummy src_modifiers
9324
9325 // Handle op_sel fields
9326
9327 unsigned OpSel = 0;
9328 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9329 if (OpselIdx != OptionalIdx.end()) {
9330 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9331 .getImm();
9332 }
9333
9334 unsigned OpSelHi = 0;
9335 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9336 if (OpselHiIdx != OptionalIdx.end()) {
9337 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9338 .getImm();
9339 }
9340 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9341 AMDGPU::OpName::src1_modifiers};
9342
9343 for (unsigned J = 0; J < 2; ++J) {
9344 unsigned ModVal = 0;
9345 if (OpSel & (1 << J))
9346 ModVal |= SISrcMods::OP_SEL_0;
9347 if (OpSelHi & (1 << J))
9348 ModVal |= SISrcMods::OP_SEL_1;
9349
9350 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9351 Inst.getOperand(ModIdx).setImm(ModVal);
9352 }
9353}
9354
9355void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9356 OptionalImmIndexMap &OptionalIdx) {
9357 unsigned Opc = Inst.getOpcode();
9358
9359 unsigned I = 1;
9360 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9361 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9362 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9363 }
9364
9365 for (unsigned E = Operands.size(); I != E; ++I) {
9366 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9368 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9369 } else if (Op.isImmModifier()) {
9370 OptionalIdx[Op.getImmTy()] = I;
9371 } else {
9372 Op.addRegOrImmOperands(Inst, 1);
9373 }
9374 }
9375
9376 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9377 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9378 AMDGPUOperand::ImmTyScaleSel);
9379
9380 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9381 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9382 AMDGPUOperand::ImmTyClamp);
9383
9384 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9385 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9386 Inst.addOperand(Inst.getOperand(0));
9387 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9388 AMDGPUOperand::ImmTyByteSel);
9389 }
9390
9391 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9392 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9393 AMDGPUOperand::ImmTyOModSI);
9394
9395 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9396 // it has src2 register operand that is tied to dst operand
9397 // we don't allow modifiers for this operand in assembler so src2_modifiers
9398 // should be 0.
9399 if (isMAC(Opc)) {
9400 auto *it = Inst.begin();
9401 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9402 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9403 ++it;
9404 // Copy the operand to ensure it's not invalidated when Inst grows.
9405 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9406 }
9407}
9408
9409void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9410 OptionalImmIndexMap OptionalIdx;
9411 cvtVOP3(Inst, Operands, OptionalIdx);
9412}
9413
9414void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9415 OptionalImmIndexMap &OptIdx) {
9416 const int Opc = Inst.getOpcode();
9417 const MCInstrDesc &Desc = MII.get(Opc);
9418
9419 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9420
9421 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9422 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9423 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9424 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9425 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9426 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9427 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9428 Inst.addOperand(Inst.getOperand(0));
9429 }
9430
9431 // Adding vdst_in operand is already covered for these DPP instructions in
9432 // cvtVOP3DPP.
9433 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9434 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9435 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9436 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9437 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9438 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9439 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9440 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9441 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9442 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9443 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9444 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9445 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9446 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9447 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9448 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9449 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9450 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9451 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9452 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9453 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9454 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9455 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9456 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9457 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9458 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9459 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9460 Inst.addOperand(Inst.getOperand(0));
9461 }
9462
9463 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9464 if (BitOp3Idx != -1) {
9465 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9466 }
9467
9468 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9469 // instruction, and then figure out where to actually put the modifiers
9470
9471 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9472 if (OpSelIdx != -1) {
9473 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9474 }
9475
9476 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9477 if (OpSelHiIdx != -1) {
9478 int DefaultVal = IsPacked ? -1 : 0;
9479 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9480 DefaultVal);
9481 }
9482
9483 int MatrixAFMTIdx =
9484 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9485 if (MatrixAFMTIdx != -1) {
9486 addOptionalImmOperand(Inst, Operands, OptIdx,
9487 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9488 }
9489
9490 int MatrixBFMTIdx =
9491 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9492 if (MatrixBFMTIdx != -1) {
9493 addOptionalImmOperand(Inst, Operands, OptIdx,
9494 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9495 }
9496
9497 int MatrixAScaleIdx =
9498 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9499 if (MatrixAScaleIdx != -1) {
9500 addOptionalImmOperand(Inst, Operands, OptIdx,
9501 AMDGPUOperand::ImmTyMatrixAScale, 0);
9502 }
9503
9504 int MatrixBScaleIdx =
9505 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9506 if (MatrixBScaleIdx != -1) {
9507 addOptionalImmOperand(Inst, Operands, OptIdx,
9508 AMDGPUOperand::ImmTyMatrixBScale, 0);
9509 }
9510
9511 int MatrixAScaleFmtIdx =
9512 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9513 if (MatrixAScaleFmtIdx != -1) {
9514 addOptionalImmOperand(Inst, Operands, OptIdx,
9515 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9516 }
9517
9518 int MatrixBScaleFmtIdx =
9519 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9520 if (MatrixBScaleFmtIdx != -1) {
9521 addOptionalImmOperand(Inst, Operands, OptIdx,
9522 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9523 }
9524
9525 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9526 addOptionalImmOperand(Inst, Operands, OptIdx,
9527 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9528
9529 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9530 addOptionalImmOperand(Inst, Operands, OptIdx,
9531 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9532
9533 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9534 if (NegLoIdx != -1)
9535 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9536
9537 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9538 if (NegHiIdx != -1)
9539 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9540
9541 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9542 AMDGPU::OpName::src2};
9543 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9544 AMDGPU::OpName::src1_modifiers,
9545 AMDGPU::OpName::src2_modifiers};
9546
9547 unsigned OpSel = 0;
9548 unsigned OpSelHi = 0;
9549 unsigned NegLo = 0;
9550 unsigned NegHi = 0;
9551
9552 if (OpSelIdx != -1)
9553 OpSel = Inst.getOperand(OpSelIdx).getImm();
9554
9555 if (OpSelHiIdx != -1)
9556 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9557
9558 if (NegLoIdx != -1)
9559 NegLo = Inst.getOperand(NegLoIdx).getImm();
9560
9561 if (NegHiIdx != -1)
9562 NegHi = Inst.getOperand(NegHiIdx).getImm();
9563
9564 for (int J = 0; J < 3; ++J) {
9565 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9566 if (OpIdx == -1)
9567 break;
9568
9569 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9570
9571 if (ModIdx == -1)
9572 continue;
9573
9574 uint32_t ModVal = 0;
9575
9576 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9577 if (SrcOp.isReg() && getMRI()
9578 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9579 .contains(SrcOp.getReg())) {
9580 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9581 if (VGPRSuffixIsHi)
9582 ModVal |= SISrcMods::OP_SEL_0;
9583 } else {
9584 if ((OpSel & (1 << J)) != 0)
9585 ModVal |= SISrcMods::OP_SEL_0;
9586 }
9587
9588 if ((OpSelHi & (1 << J)) != 0)
9589 ModVal |= SISrcMods::OP_SEL_1;
9590
9591 if ((NegLo & (1 << J)) != 0)
9592 ModVal |= SISrcMods::NEG;
9593
9594 if ((NegHi & (1 << J)) != 0)
9595 ModVal |= SISrcMods::NEG_HI;
9596
9597 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9598 }
9599}
9600
9601void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9602 OptionalImmIndexMap OptIdx;
9603 cvtVOP3(Inst, Operands, OptIdx);
9604 cvtVOP3P(Inst, Operands, OptIdx);
9605}
9606
9608 unsigned i, unsigned Opc,
9609 AMDGPU::OpName OpName) {
9610 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9611 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9612 else
9613 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9614}
9615
9616void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9617 unsigned Opc = Inst.getOpcode();
9618
9619 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9620 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9621 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9622 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9623 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9624
9625 OptionalImmIndexMap OptIdx;
9626 for (unsigned i = 5; i < Operands.size(); ++i) {
9627 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9628 OptIdx[Op.getImmTy()] = i;
9629 }
9630
9631 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9632 addOptionalImmOperand(Inst, Operands, OptIdx,
9633 AMDGPUOperand::ImmTyIndexKey8bit);
9634
9635 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9636 addOptionalImmOperand(Inst, Operands, OptIdx,
9637 AMDGPUOperand::ImmTyIndexKey16bit);
9638
9639 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9640 addOptionalImmOperand(Inst, Operands, OptIdx,
9641 AMDGPUOperand::ImmTyIndexKey32bit);
9642
9643 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9644 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9645
9646 cvtVOP3P(Inst, Operands, OptIdx);
9647}
9648
9649//===----------------------------------------------------------------------===//
9650// VOPD
9651//===----------------------------------------------------------------------===//
9652
9653ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9654 if (!hasVOPD(getSTI()))
9655 return ParseStatus::NoMatch;
9656
9657 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9658 SMLoc S = getLoc();
9659 lex();
9660 lex();
9661 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9662 SMLoc OpYLoc = getLoc();
9663 StringRef OpYName;
9664 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9665 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9666 return ParseStatus::Success;
9667 }
9668 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9669 }
9670 return ParseStatus::NoMatch;
9671}
9672
9673// Create VOPD MCInst operands using parsed assembler operands.
9674void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9675 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9676
9677 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9678 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9680 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9681 return;
9682 }
9683 if (Op.isReg()) {
9684 Op.addRegOperands(Inst, 1);
9685 return;
9686 }
9687 if (Op.isImm()) {
9688 Op.addImmOperands(Inst, 1);
9689 return;
9690 }
9691 llvm_unreachable("Unhandled operand type in cvtVOPD");
9692 };
9693
9694 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9695
9696 // MCInst operands are ordered as follows:
9697 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9698
9699 for (auto CompIdx : VOPD::COMPONENTS) {
9700 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9701 }
9702
9703 for (auto CompIdx : VOPD::COMPONENTS) {
9704 const auto &CInfo = InstInfo[CompIdx];
9705 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9706 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9707 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9708 if (CInfo.hasSrc2Acc())
9709 addOp(CInfo.getIndexOfDstInParsedOperands());
9710 }
9711
9712 int BitOp3Idx =
9713 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9714 if (BitOp3Idx != -1) {
9715 OptionalImmIndexMap OptIdx;
9716 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9717 if (Op.isImm())
9718 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9719
9720 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9721 }
9722}
9723
9724//===----------------------------------------------------------------------===//
9725// dpp
9726//===----------------------------------------------------------------------===//
9727
9728bool AMDGPUOperand::isDPP8() const {
9729 return isImmTy(ImmTyDPP8);
9730}
9731
9732bool AMDGPUOperand::isDPPCtrl() const {
9733 using namespace AMDGPU::DPP;
9734
9735 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9736 if (result) {
9737 int64_t Imm = getImm();
9738 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9739 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9740 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9741 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9742 (Imm == DppCtrl::WAVE_SHL1) ||
9743 (Imm == DppCtrl::WAVE_ROL1) ||
9744 (Imm == DppCtrl::WAVE_SHR1) ||
9745 (Imm == DppCtrl::WAVE_ROR1) ||
9746 (Imm == DppCtrl::ROW_MIRROR) ||
9747 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9748 (Imm == DppCtrl::BCAST15) ||
9749 (Imm == DppCtrl::BCAST31) ||
9750 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9751 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9752 }
9753 return false;
9754}
9755
9756//===----------------------------------------------------------------------===//
9757// mAI
9758//===----------------------------------------------------------------------===//
9759
9760bool AMDGPUOperand::isBLGP() const {
9761 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9762}
9763
9764bool AMDGPUOperand::isS16Imm() const {
9765 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9766}
9767
9768bool AMDGPUOperand::isU16Imm() const {
9769 return isImmLiteral() && isUInt<16>(getImm());
9770}
9771
9772//===----------------------------------------------------------------------===//
9773// dim
9774//===----------------------------------------------------------------------===//
9775
9776bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9777 // We want to allow "dim:1D" etc.,
9778 // but the initial 1 is tokenized as an integer.
9779 std::string Token;
9780 if (isToken(AsmToken::Integer)) {
9781 SMLoc Loc = getToken().getEndLoc();
9782 Token = std::string(getTokenStr());
9783 lex();
9784 if (getLoc() != Loc)
9785 return false;
9786 }
9787
9788 StringRef Suffix;
9789 if (!parseId(Suffix))
9790 return false;
9791 Token += Suffix;
9792
9793 StringRef DimId = Token;
9794 DimId.consume_front("SQ_RSRC_IMG_");
9795
9796 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9797 if (!DimInfo)
9798 return false;
9799
9800 Encoding = DimInfo->Encoding;
9801 return true;
9802}
9803
9804ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9805 if (!isGFX10Plus())
9806 return ParseStatus::NoMatch;
9807
9808 SMLoc S = getLoc();
9809
9810 if (!trySkipId("dim", AsmToken::Colon))
9811 return ParseStatus::NoMatch;
9812
9813 unsigned Encoding;
9814 SMLoc Loc = getLoc();
9815 if (!parseDimId(Encoding))
9816 return Error(Loc, "invalid dim value");
9817
9818 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9819 AMDGPUOperand::ImmTyDim));
9820 return ParseStatus::Success;
9821}
9822
9823//===----------------------------------------------------------------------===//
9824// dpp
9825//===----------------------------------------------------------------------===//
9826
9827ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9828 SMLoc S = getLoc();
9829
9830 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9831 return ParseStatus::NoMatch;
9832
9833 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9834
9835 int64_t Sels[8];
9836
9837 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9838 return ParseStatus::Failure;
9839
9840 for (size_t i = 0; i < 8; ++i) {
9841 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9842 return ParseStatus::Failure;
9843
9844 SMLoc Loc = getLoc();
9845 if (getParser().parseAbsoluteExpression(Sels[i]))
9846 return ParseStatus::Failure;
9847 if (0 > Sels[i] || 7 < Sels[i])
9848 return Error(Loc, "expected a 3-bit value");
9849 }
9850
9851 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9852 return ParseStatus::Failure;
9853
9854 unsigned DPP8 = 0;
9855 for (size_t i = 0; i < 8; ++i)
9856 DPP8 |= (Sels[i] << (i * 3));
9857
9858 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9859 return ParseStatus::Success;
9860}
9861
9862bool
9863AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9864 const OperandVector &Operands) {
9865 if (Ctrl == "row_newbcast")
9866 return isGFX90A();
9867
9868 if (Ctrl == "row_share" ||
9869 Ctrl == "row_xmask")
9870 return isGFX10Plus();
9871
9872 if (Ctrl == "wave_shl" ||
9873 Ctrl == "wave_shr" ||
9874 Ctrl == "wave_rol" ||
9875 Ctrl == "wave_ror" ||
9876 Ctrl == "row_bcast")
9877 return isVI() || isGFX9();
9878
9879 return Ctrl == "row_mirror" ||
9880 Ctrl == "row_half_mirror" ||
9881 Ctrl == "quad_perm" ||
9882 Ctrl == "row_shl" ||
9883 Ctrl == "row_shr" ||
9884 Ctrl == "row_ror";
9885}
9886
9887int64_t
9888AMDGPUAsmParser::parseDPPCtrlPerm() {
9889 // quad_perm:[%d,%d,%d,%d]
9890
9891 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9892 return -1;
9893
9894 int64_t Val = 0;
9895 for (int i = 0; i < 4; ++i) {
9896 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9897 return -1;
9898
9899 int64_t Temp;
9900 SMLoc Loc = getLoc();
9901 if (getParser().parseAbsoluteExpression(Temp))
9902 return -1;
9903 if (Temp < 0 || Temp > 3) {
9904 Error(Loc, "expected a 2-bit value");
9905 return -1;
9906 }
9907
9908 Val += (Temp << i * 2);
9909 }
9910
9911 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9912 return -1;
9913
9914 return Val;
9915}
9916
9917int64_t
9918AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9919 using namespace AMDGPU::DPP;
9920
9921 // sel:%d
9922
9923 int64_t Val;
9924 SMLoc Loc = getLoc();
9925
9926 if (getParser().parseAbsoluteExpression(Val))
9927 return -1;
9928
9929 struct DppCtrlCheck {
9930 int64_t Ctrl;
9931 int Lo;
9932 int Hi;
9933 };
9934
9935 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9936 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9937 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9938 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9939 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9940 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9941 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9942 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9943 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9944 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9945 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9946 .Default({-1, 0, 0});
9947
9948 bool Valid;
9949 if (Check.Ctrl == -1) {
9950 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9951 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9952 } else {
9953 Valid = Check.Lo <= Val && Val <= Check.Hi;
9954 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9955 }
9956
9957 if (!Valid) {
9958 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9959 return -1;
9960 }
9961
9962 return Val;
9963}
9964
9965ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9966 using namespace AMDGPU::DPP;
9967
9968 if (!isToken(AsmToken::Identifier) ||
9969 !isSupportedDPPCtrl(getTokenStr(), Operands))
9970 return ParseStatus::NoMatch;
9971
9972 SMLoc S = getLoc();
9973 int64_t Val = -1;
9974 StringRef Ctrl;
9975
9976 parseId(Ctrl);
9977
9978 if (Ctrl == "row_mirror") {
9979 Val = DppCtrl::ROW_MIRROR;
9980 } else if (Ctrl == "row_half_mirror") {
9981 Val = DppCtrl::ROW_HALF_MIRROR;
9982 } else {
9983 if (skipToken(AsmToken::Colon, "expected a colon")) {
9984 if (Ctrl == "quad_perm") {
9985 Val = parseDPPCtrlPerm();
9986 } else {
9987 Val = parseDPPCtrlSel(Ctrl);
9988 }
9989 }
9990 }
9991
9992 if (Val == -1)
9993 return ParseStatus::Failure;
9994
9995 Operands.push_back(
9996 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9997 return ParseStatus::Success;
9998}
9999
10000void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10001 bool IsDPP8) {
10002 OptionalImmIndexMap OptionalIdx;
10003 unsigned Opc = Inst.getOpcode();
10004 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10005
10006 // MAC instructions are special because they have 'old'
10007 // operand which is not tied to dst (but assumed to be).
10008 // They also have dummy unused src2_modifiers.
10009 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10010 int Src2ModIdx =
10011 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10012 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10013 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10014
10015 unsigned I = 1;
10016 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10017 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10018 }
10019
10020 int Fi = 0;
10021 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10022 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10023 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10024 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10025 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10026
10027 for (unsigned E = Operands.size(); I != E; ++I) {
10028
10029 if (IsMAC) {
10030 int NumOperands = Inst.getNumOperands();
10031 if (OldIdx == NumOperands) {
10032 // Handle old operand
10033 constexpr int DST_IDX = 0;
10034 Inst.addOperand(Inst.getOperand(DST_IDX));
10035 } else if (Src2ModIdx == NumOperands) {
10036 // Add unused dummy src2_modifiers
10038 }
10039 }
10040
10041 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10042 Inst.addOperand(Inst.getOperand(0));
10043 }
10044
10045 if (IsVOP3CvtSrDpp) {
10046 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10048 Inst.addOperand(MCOperand::createReg(MCRegister()));
10049 }
10050 }
10051
10052 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10054 if (TiedTo != -1) {
10055 assert((unsigned)TiedTo < Inst.getNumOperands());
10056 // handle tied old or src2 for MAC instructions
10057 Inst.addOperand(Inst.getOperand(TiedTo));
10058 }
10059 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10060 // Add the register arguments
10061 if (IsDPP8 && Op.isDppFI()) {
10062 Fi = Op.getImm();
10063 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10064 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10065 } else if (Op.isReg()) {
10066 Op.addRegOperands(Inst, 1);
10067 } else if (Op.isImm() &&
10068 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10069 Op.addImmOperands(Inst, 1);
10070 } else if (Op.isImm()) {
10071 OptionalIdx[Op.getImmTy()] = I;
10072 } else {
10073 llvm_unreachable("unhandled operand type");
10074 }
10075 }
10076
10077 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10078 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10079 AMDGPUOperand::ImmTyClamp);
10080
10081 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10082 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10083 Inst.addOperand(Inst.getOperand(0));
10084 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10085 AMDGPUOperand::ImmTyByteSel);
10086 }
10087
10088 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10090
10091 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10092 cvtVOP3P(Inst, Operands, OptionalIdx);
10093 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10094 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10095 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10097 }
10098
10099 if (IsDPP8) {
10100 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10101 using namespace llvm::AMDGPU::DPP;
10102 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10103 } else {
10104 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10105 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10106 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10108
10109 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10110 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10111 AMDGPUOperand::ImmTyDppFI);
10112 }
10113}
10114
10115void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10116 OptionalImmIndexMap OptionalIdx;
10117
10118 unsigned I = 1;
10119 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10120 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10121 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10122 }
10123
10124 int Fi = 0;
10125 for (unsigned E = Operands.size(); I != E; ++I) {
10126 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10128 if (TiedTo != -1) {
10129 assert((unsigned)TiedTo < Inst.getNumOperands());
10130 // handle tied old or src2 for MAC instructions
10131 Inst.addOperand(Inst.getOperand(TiedTo));
10132 }
10133 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10134 // Add the register arguments
10135 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10136 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10137 // Skip it.
10138 continue;
10139 }
10140
10141 if (IsDPP8) {
10142 if (Op.isDPP8()) {
10143 Op.addImmOperands(Inst, 1);
10144 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10145 Op.addRegWithFPInputModsOperands(Inst, 2);
10146 } else if (Op.isDppFI()) {
10147 Fi = Op.getImm();
10148 } else if (Op.isReg()) {
10149 Op.addRegOperands(Inst, 1);
10150 } else {
10151 llvm_unreachable("Invalid operand type");
10152 }
10153 } else {
10155 Op.addRegWithFPInputModsOperands(Inst, 2);
10156 } else if (Op.isReg()) {
10157 Op.addRegOperands(Inst, 1);
10158 } else if (Op.isDPPCtrl()) {
10159 Op.addImmOperands(Inst, 1);
10160 } else if (Op.isImm()) {
10161 // Handle optional arguments
10162 OptionalIdx[Op.getImmTy()] = I;
10163 } else {
10164 llvm_unreachable("Invalid operand type");
10165 }
10166 }
10167 }
10168
10169 if (IsDPP8) {
10170 using namespace llvm::AMDGPU::DPP;
10171 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10172 } else {
10173 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10174 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10175 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10176 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10177 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10178 AMDGPUOperand::ImmTyDppFI);
10179 }
10180 }
10181}
10182
10183//===----------------------------------------------------------------------===//
10184// sdwa
10185//===----------------------------------------------------------------------===//
10186
10187ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10188 StringRef Prefix,
10189 AMDGPUOperand::ImmTy Type) {
10190 return parseStringOrIntWithPrefix(
10191 Operands, Prefix,
10192 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10193 Type);
10194}
10195
10196ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10197 return parseStringOrIntWithPrefix(
10198 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10199 AMDGPUOperand::ImmTySDWADstUnused);
10200}
10201
10202void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10203 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10204}
10205
10206void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10207 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10208}
10209
10210void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10211 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10212}
10213
10214void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10215 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10216}
10217
10218void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10219 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10220}
10221
10222void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10223 uint64_t BasicInstType,
10224 bool SkipDstVcc,
10225 bool SkipSrcVcc) {
10226 using namespace llvm::AMDGPU::SDWA;
10227
10228 OptionalImmIndexMap OptionalIdx;
10229 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10230 bool SkippedVcc = false;
10231
10232 unsigned I = 1;
10233 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10234 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10235 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10236 }
10237
10238 for (unsigned E = Operands.size(); I != E; ++I) {
10239 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10240 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10241 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10242 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10243 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10244 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10245 // Skip VCC only if we didn't skip it on previous iteration.
10246 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10247 if (BasicInstType == SIInstrFlags::VOP2 &&
10248 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10249 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10250 SkippedVcc = true;
10251 continue;
10252 }
10253 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10254 SkippedVcc = true;
10255 continue;
10256 }
10257 }
10259 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10260 } else if (Op.isImm()) {
10261 // Handle optional arguments
10262 OptionalIdx[Op.getImmTy()] = I;
10263 } else {
10264 llvm_unreachable("Invalid operand type");
10265 }
10266 SkippedVcc = false;
10267 }
10268
10269 const unsigned Opc = Inst.getOpcode();
10270 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10271 Opc != AMDGPU::V_NOP_sdwa_vi) {
10272 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10273 switch (BasicInstType) {
10274 case SIInstrFlags::VOP1:
10275 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10276 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10277 AMDGPUOperand::ImmTyClamp, 0);
10278
10279 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10280 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10281 AMDGPUOperand::ImmTyOModSI, 0);
10282
10283 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10284 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10285 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10286
10287 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10288 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10289 AMDGPUOperand::ImmTySDWADstUnused,
10290 DstUnused::UNUSED_PRESERVE);
10291
10292 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10293 break;
10294
10295 case SIInstrFlags::VOP2:
10296 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10297 AMDGPUOperand::ImmTyClamp, 0);
10298
10299 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10300 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10301
10302 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10303 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10304 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10305 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10306 break;
10307
10308 case SIInstrFlags::VOPC:
10309 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10310 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10311 AMDGPUOperand::ImmTyClamp, 0);
10312 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10313 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10314 break;
10315
10316 default:
10317 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10318 }
10319 }
10320
10321 // special case v_mac_{f16, f32}:
10322 // it has src2 register operand that is tied to dst operand
10323 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10324 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10325 auto *it = Inst.begin();
10326 std::advance(
10327 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10328 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10329 }
10330}
10331
10332/// Force static initialization.
10333extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10338
10339#define GET_REGISTER_MATCHER
10340#define GET_MATCHER_IMPLEMENTATION
10341#define GET_MNEMONIC_SPELL_CHECKER
10342#define GET_MNEMONIC_CHECKER
10343#include "AMDGPUGenAsmMatcher.inc"
10344
10345ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10346 unsigned MCK) {
10347 switch (MCK) {
10348 case MCK_addr64:
10349 return parseTokenOp("addr64", Operands);
10350 case MCK_done:
10351 return parseTokenOp("done", Operands);
10352 case MCK_idxen:
10353 return parseTokenOp("idxen", Operands);
10354 case MCK_lds:
10355 return parseTokenOp("lds", Operands);
10356 case MCK_offen:
10357 return parseTokenOp("offen", Operands);
10358 case MCK_off:
10359 return parseTokenOp("off", Operands);
10360 case MCK_row_95_en:
10361 return parseTokenOp("row_en", Operands);
10362 case MCK_gds:
10363 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10364 case MCK_tfe:
10365 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10366 }
10367 return tryCustomParseOperand(Operands, MCK);
10368}
10369
10370// This function should be defined after auto-generated include so that we have
10371// MatchClassKind enum defined
10372unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10373 unsigned Kind) {
10374 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10375 // But MatchInstructionImpl() expects to meet token and fails to validate
10376 // operand. This method checks if we are given immediate operand but expect to
10377 // get corresponding token.
10378 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10379 switch (Kind) {
10380 case MCK_addr64:
10381 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10382 case MCK_gds:
10383 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10384 case MCK_lds:
10385 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10386 case MCK_idxen:
10387 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10388 case MCK_offen:
10389 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10390 case MCK_tfe:
10391 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10392 case MCK_SSrc_b32:
10393 // When operands have expression values, they will return true for isToken,
10394 // because it is not possible to distinguish between a token and an
10395 // expression at parse time. MatchInstructionImpl() will always try to
10396 // match an operand as a token, when isToken returns true, and when the
10397 // name of the expression is not a valid token, the match will fail,
10398 // so we need to handle it here.
10399 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10400 case MCK_SSrc_f32:
10401 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10402 case MCK_SOPPBrTarget:
10403 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10404 case MCK_VReg32OrOff:
10405 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10406 case MCK_InterpSlot:
10407 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10408 case MCK_InterpAttr:
10409 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10410 case MCK_InterpAttrChan:
10411 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10412 case MCK_SReg_64:
10413 case MCK_SReg_64_XEXEC:
10414 // Null is defined as a 32-bit register but
10415 // it should also be enabled with 64-bit operands or larger.
10416 // The following code enables it for SReg_64 and larger operands
10417 // used as source and destination. Remaining source
10418 // operands are handled in isInlinableImm.
10419 case MCK_SReg_96:
10420 case MCK_SReg_128:
10421 case MCK_SReg_256:
10422 case MCK_SReg_512:
10423 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10424 default:
10425 return Match_InvalidOperand;
10426 }
10427}
10428
10429//===----------------------------------------------------------------------===//
10430// endpgm
10431//===----------------------------------------------------------------------===//
10432
10433ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10434 SMLoc S = getLoc();
10435 int64_t Imm = 0;
10436
10437 if (!parseExpr(Imm)) {
10438 // The operand is optional, if not present default to 0
10439 Imm = 0;
10440 }
10441
10442 if (!isUInt<16>(Imm))
10443 return Error(S, "expected a 16-bit value");
10444
10445 Operands.push_back(
10446 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10447 return ParseStatus::Success;
10448}
10449
10450bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10451
10452//===----------------------------------------------------------------------===//
10453// Split Barrier
10454//===----------------------------------------------------------------------===//
10455
10456bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
static bool Enabled
Definition Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
Target independent representation for an assembler token.
Definition MCAsmMacro.h:22
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:95
Represents a location in source code.
Definition SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
constexpr const char * getPointer() const
Definition SMLoc.h:34
constexpr bool isValid() const
Definition SMLoc.h:29
Represents a range in source code.
Definition SMLoc.h:48
SMLoc Start
Definition SMLoc.h:50
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:665
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:619
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:645
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:281
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1425
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:62
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:217
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:314
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition APFloat.cpp:265
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...