diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index e9dc508ee6fcc9..8f4c5260fa2692 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5962,6 +5962,148 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + // IF_SVE_FE_3A + theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_H); // SMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V2, REG_V3, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V4, REG_V5, REG_V2, 2, + INS_OPTS_SCALABLE_H); // SMULLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V6, REG_V7, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SMULLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V8, REG_V9, REG_V4, 4, + INS_OPTS_SCALABLE_H); // UMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V10, REG_V11, REG_V5, 5, + INS_OPTS_SCALABLE_H); // UMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V12, REG_V13, REG_V6, 6, + INS_OPTS_SCALABLE_H); // UMULLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V14, REG_V15, REG_V7, 7, + INS_OPTS_SCALABLE_H); // UMULLT .S, .H, .H[] + + // IF_SVE_FE_3B + theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V2, REG_V3, REG_V2, 1, + INS_OPTS_SCALABLE_S); // SMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V4, REG_V5, REG_V4, 2, + INS_OPTS_SCALABLE_S); // SMULLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V6, REG_V7, REG_V6, 3, + INS_OPTS_SCALABLE_S); // SMULLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V8, REG_V9, REG_V8, 0, + INS_OPTS_SCALABLE_S); // UMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V10, REG_V11, REG_V10, 1, + INS_OPTS_SCALABLE_S); // UMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V12, REG_V13, REG_V12, 2, + INS_OPTS_SCALABLE_S); // UMULLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V14, REG_V15, REG_V14, 3, + INS_OPTS_SCALABLE_S); // UMULLT .D, .S, .S[] + + // IF_SVE_FG_3A + theEmitter->emitIns_R_R_R_I(INS_sve_smlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_H); // SMLALB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlalt, EA_SCALABLE, REG_V2, REG_V3, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SMLALT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlslb, EA_SCALABLE, REG_V4, REG_V5, REG_V2, 2, + INS_OPTS_SCALABLE_H); // SMLSLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlslt, EA_SCALABLE, REG_V6, REG_V7, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SMLSLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlalb, EA_SCALABLE, REG_V8, REG_V9, REG_V4, 4, + INS_OPTS_SCALABLE_H); // UMLALB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlalt, EA_SCALABLE, REG_V10, REG_V11, REG_V5, 5, + INS_OPTS_SCALABLE_H); // UMLALT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlslb, EA_SCALABLE, REG_V12, REG_V13, REG_V6, 6, + INS_OPTS_SCALABLE_H); // UMLSLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlslt, EA_SCALABLE, REG_V14, REG_V15, REG_V7, 7, + INS_OPTS_SCALABLE_H); // UMLSLT .S, .H, .H[] + + // IF_SVE_FG_3B + theEmitter->emitIns_R_R_R_I(INS_sve_smlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SMLALB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlalt, EA_SCALABLE, REG_V2, REG_V3, REG_V2, 1, + INS_OPTS_SCALABLE_S); // SMLALT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlslb, EA_SCALABLE, REG_V4, REG_V5, REG_V4, 2, + INS_OPTS_SCALABLE_S); // SMLSLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlslt, EA_SCALABLE, REG_V6, REG_V7, REG_V6, 3, + INS_OPTS_SCALABLE_S); // SMLSLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlalb, EA_SCALABLE, REG_V8, REG_V9, REG_V8, 0, + INS_OPTS_SCALABLE_S); // UMLALB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlalt, EA_SCALABLE, REG_V10, REG_V11, REG_V10, 1, + INS_OPTS_SCALABLE_S); // UMLALT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlslb, EA_SCALABLE, REG_V12, REG_V13, REG_V12, 2, + INS_OPTS_SCALABLE_S); // UMLSLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlslt, EA_SCALABLE, REG_V14, REG_V15, REG_V14, 3, + INS_OPTS_SCALABLE_S); // UMLSLT .D, .S, .S[] + + // IF_SVE_FH_3A + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullb, EA_SCALABLE, REG_V0, REG_V2, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SQDMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullb, EA_SCALABLE, REG_V4, REG_V6, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SQDMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V8, REG_V10, REG_V5, 5, + INS_OPTS_SCALABLE_H); // SQDMULLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V12, REG_V14, REG_V7, 7, + INS_OPTS_SCALABLE_H); // SQDMULLT .S, .H, .H[] + + // IF_SVE_FH_3B + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullb, EA_SCALABLE, REG_V0, REG_V2, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SQDMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullb, EA_SCALABLE, REG_V4, REG_V6, REG_V5, 1, + INS_OPTS_SCALABLE_S); // SQDMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V8, REG_V10, REG_V10, 2, + INS_OPTS_SCALABLE_S); // SQDMULLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V12, REG_V14, REG_V15, 3, + INS_OPTS_SCALABLE_S); // SQDMULLT .D, .S, .S[] + + // IF_SVE_FI_3A + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SQDMULH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SQDMULH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5, + INS_OPTS_SCALABLE_H); // SQRDMULH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V6, REG_V7, REG_V7, 7, + INS_OPTS_SCALABLE_H); // SQRDMULH .H, .H, .H[] + + // IF_SVE_FI_3B + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V8, REG_V9, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SQDMULH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V10, REG_V11, REG_V2, 1, + INS_OPTS_SCALABLE_S); // SQDMULH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V12, REG_V13, REG_V4, 2, + INS_OPTS_SCALABLE_S); // SQRDMULH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V14, REG_V15, REG_V6, 3, + INS_OPTS_SCALABLE_S); // SQRDMULH .S, .S, .S[] + + // IF_SVE_FI_3C + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V16, REG_V17, REG_V0, 0, + INS_OPTS_SCALABLE_D); // SQDMULH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V18, REG_V19, REG_V5, 1, + INS_OPTS_SCALABLE_D); // SQDMULH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V20, REG_V21, REG_V10, 0, + INS_OPTS_SCALABLE_D); // SQRDMULH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, + INS_OPTS_SCALABLE_D); // SQRDMULH .D, .D, .D[] + + // IF_SVE_FJ_3A + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SQDMLALB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlalt, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SQDMLALT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlslb, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5, + INS_OPTS_SCALABLE_H); // SQDMLSLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlslt, EA_SCALABLE, REG_V6, REG_V0, REG_V7, 7, + INS_OPTS_SCALABLE_H); // SQDMLSLT .S, .H, .H[] + + // IF_SVE_FJ_3B + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlalb, EA_SCALABLE, REG_V8, REG_V9, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SQDMLALB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlalt, EA_SCALABLE, REG_V10, REG_V11, REG_V5, 1, + INS_OPTS_SCALABLE_S); // SQDMLALT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlslb, EA_SCALABLE, REG_V12, REG_V13, REG_V10, 2, + INS_OPTS_SCALABLE_S); // SQDMLSLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlslt, EA_SCALABLE, REG_V14, REG_V15, REG_V15, 3, + INS_OPTS_SCALABLE_S); // SQDMLSLT .D, .S, .S[] + // IF_SVE_FF_3A theEmitter->emitIns_R_R_R_I(INS_sve_mla, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1, INS_OPTS_SCALABLE_H); // MLA .H, .H, .H[] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 8d75e37c945e44..2fe4ae43110c86 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1155,10 +1155,11 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1168,9 +1169,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1179,10 +1185,23 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm3(emitGetInsSC(id))); // iii break; + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15)); + assert(isValidUimm2(emitGetInsSC(id))); // ii + break; + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -11697,6 +11716,127 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_sve_smullb: + case INS_sve_smullt: + case INS_sve_umullb: + case INS_sve_umullt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FE_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FE_3B; + } + break; + + case INS_sve_smlalb: + case INS_sve_smlalt: + case INS_sve_umlalb: + case INS_sve_umlalt: + case INS_sve_smlslb: + case INS_sve_smlslt: + case INS_sve_umlslb: + case INS_sve_umlslt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FG_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FG_3B; + } + break; + + case INS_sve_sqdmullb: + case INS_sve_sqdmullt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FH_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FH_3B; + } + break; + + case INS_sve_sqdmulh: + case INS_sve_sqrdmulh: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FI_3A; + } + else if (opt == INS_OPTS_SCALABLE_S) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_FI_3B; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidImm1(imm)); // i + fmt = IF_SVE_FI_3C; + } + break; + + case INS_sve_sqdmlalb: + case INS_sve_sqdmlalt: + case INS_sve_sqdmlslb: + case INS_sve_sqdmlslt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FJ_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_FJ_3B; + } + break; + default: unreached(); break; @@ -17877,6 +18017,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 19; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 1 bit at bit location '11'. + */ + +/*static*/ emitter::code_t emitter::insEncodeImm1_11(ssize_t imm) +{ + assert(isValidImm1(imm)); + return (code_t)imm << 11; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 1 bit at bit location '22'. @@ -20320,10 +20471,11 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -20333,23 +20485,53 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm code |= insEncodeUimm2_20_to_19(imm & 0b11); // ii - code |= insEncodeImm1_22((imm & 0b100) >> 2); // i + code |= insEncodeImm1_22(imm >> 2); // i + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeImm1_11(imm & 1); // i + code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm + code |= insEncodeUimm2_20_to_19(imm >> 1); // ii + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeImm1_11(imm & 1); // i + code |= insEncodeReg_V_19_to_16(id->idReg3()); // mmmm + code |= insEncodeUimm2_20_to_19(imm & 0b10); // i dst += emitOutput_Instr(dst, code); break; case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -23556,10 +23738,15 @@ void emitter::emitDispInsHelp( // .S, .H, .H[] case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) - case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) // .S, .B, .B[] case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + // .S, .H, .H[] + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) // .S, .S, .S[] case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) @@ -23567,15 +23754,30 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id), false); // ii/iii + break; + + // .D, .S, .S[] + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + // .D, .S, .S[] + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmm emitDispElementIndex(emitGetInsSC(id), false); // ii break; // .H, .H, .H[] case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) // .S, .S, .S[] case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) // .D, .D, .D[] case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) // .D, .D, .D[] case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) @@ -26612,6 +26814,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_5C; break; @@ -26720,14 +26925,18 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - - case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate - case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate - case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) - case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate + case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate + case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) + case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 68cd25723af5bb..6fd90d29549bc7 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -574,6 +574,9 @@ static code_t insEncodeUimm2_11_to_10(ssize_t imm); // Returns the encoding for the immediate value as 2-bits at bit locations '20-19'. static code_t insEncodeUimm2_20_to_19(ssize_t imm); +// Returns the encoding for the immediate value as 1 bit at bit location '11'. +static code_t insEncodeImm1_11(ssize_t imm); + // Returns the encoding for the immediate value as 1 bit at bit location '22'. static code_t insEncodeImm1_22(ssize_t imm);