From cdda170b01b608951847d19bc97eeb900fcf989c Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 7 Feb 2024 21:51:28 -0500 Subject: [PATCH 1/7] Add IF_SVE_FE_3{A,B} --- src/coreclr/jit/codegenarm64test.cpp | 28 +++++++++ src/coreclr/jit/emitarm64.cpp | 92 +++++++++++++++++++++++++--- src/coreclr/jit/emitarm64.h | 3 + 3 files changed, 114 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index ce46e75992096f..f76890c7355769 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5882,6 +5882,34 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + // IF_SVE_FE_3A + theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_H); // SMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V2, REG_V3, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V4, REG_V5, REG_V2, 2, + INS_OPTS_SCALABLE_H); // SMULLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V6, REG_V7, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SMULLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V8, REG_V9, REG_V4, 4, + INS_OPTS_SCALABLE_H); // UMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V10, REG_V11, REG_V5, 5, + INS_OPTS_SCALABLE_H); // UMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V12, REG_V13, REG_V6, 6, + INS_OPTS_SCALABLE_H); // UMULLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V14, REG_V15, REG_V7, 7, + INS_OPTS_SCALABLE_H); // UMULLT .S, .H, .H[] + + // IF_SVE_FE_3B + theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, + INS_OPTS_SCALABLE_S); // SMULLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 2, + INS_OPTS_SCALABLE_S); // UMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 3, + INS_OPTS_SCALABLE_S); // UMULLT .D, .S, .S[] + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 2391e52e5b2004..c2519320e33704 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1156,6 +1156,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1164,6 +1165,15 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm3(emitGetInsSC(id))); // iii break; + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15)); + assert(isValidUimm2(emitGetInsSC(id))); // ii + break; + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd @@ -11510,6 +11520,29 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_JN_3A; break; + case INS_sve_smullb: + case INS_sve_smullt: + case INS_sve_umullb: + case INS_sve_umullt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FE_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FE_3B; + } + break; + default: unreached(); break; @@ -17665,6 +17698,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 19; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 1 bit at bit location '11'. + */ + +/*static*/ emitter::code_t emitter::insEncodeImm1_11(ssize_t imm) +{ + assert(isValidImm1(imm)); + return (code_t)imm << 11; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 1 bit at bit location '22'. @@ -20115,7 +20159,29 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm code |= insEncodeUimm2_20_to_19(imm & 0b11); // ii - code |= insEncodeImm1_22((imm & 0b100) >> 2); // i + code |= insEncodeImm1_22(imm >> 2); // i + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeImm1_11(imm & 1); // i + code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm + code |= insEncodeUimm2_20_to_19(imm >> 1); // ii + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeImm1_11(imm & 1); // i + code |= insEncodeReg_V_19_to_16(id->idReg3()); // mmmm + code |= insEncodeUimm2_20_to_19(imm & 0b10); // i dst += emitOutput_Instr(dst, code); break; @@ -23291,9 +23357,19 @@ void emitter::emitDispInsHelp( // .S, .B, .B[] case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + // .S, .H, .H[] + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id), false); // ii/iii + break; + + // .D, .S, .S[] + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmm emitDispElementIndex(emitGetInsSC(id), false); // ii break; @@ -26393,14 +26469,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - - case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate - case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate - case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) - case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate + case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate + case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) + case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 6d40ce8116fb3e..f1ef3073cb960c 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -570,6 +570,9 @@ static code_t insEncodeUimm2_11_to_10(ssize_t imm); // Returns the encoding for the immediate value as 2-bits at bit locations '20-19'. static code_t insEncodeUimm2_20_to_19(ssize_t imm); +// Returns the encoding for the immediate value as 1 bit at bit location '11'. +static code_t insEncodeImm1_11(ssize_t imm); + // Returns the encoding for the immediate value as 1 bit at bit location '22'. static code_t insEncodeImm1_22(ssize_t imm); From beddbf815757bdd831801820a4885761423d3984 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 7 Feb 2024 22:08:29 -0500 Subject: [PATCH 2/7] Implement IF_SVE_FG_3{A,B} --- src/coreclr/jit/codegenarm64test.cpp | 36 ++++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.cpp | 36 ++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index f76890c7355769..ff3a414cdc3ff9 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5910,6 +5910,42 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 3, INS_OPTS_SCALABLE_S); // UMULLT .D, .S, .S[] + // IF_SVE_FG_3A + theEmitter->emitIns_R_R_R_I(INS_sve_smlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_H); // SMLALB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlalt, EA_SCALABLE, REG_V2, REG_V3, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SMLALT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlslb, EA_SCALABLE, REG_V4, REG_V5, REG_V2, 2, + INS_OPTS_SCALABLE_H); // SMLSLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlslt, EA_SCALABLE, REG_V6, REG_V7, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SMLSLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlalb, EA_SCALABLE, REG_V8, REG_V9, REG_V4, 4, + INS_OPTS_SCALABLE_H); // UMLALB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlalt, EA_SCALABLE, REG_V10, REG_V11, REG_V5, 5, + INS_OPTS_SCALABLE_H); // UMLALT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlslb, EA_SCALABLE, REG_V12, REG_V13, REG_V6, 6, + INS_OPTS_SCALABLE_H); // UMLSLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlslt, EA_SCALABLE, REG_V14, REG_V15, REG_V7, 7, + INS_OPTS_SCALABLE_H); // UMLSLT .S, .H, .H[] + + // IF_SVE_FG_3B + theEmitter->emitIns_R_R_R_I(INS_sve_smlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SMLALB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlalt, EA_SCALABLE, REG_V2, REG_V3, REG_V2, 1, + INS_OPTS_SCALABLE_S); // SMLALT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlslb, EA_SCALABLE, REG_V4, REG_V5, REG_V4, 2, + INS_OPTS_SCALABLE_S); // SMLSLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smlslt, EA_SCALABLE, REG_V6, REG_V7, REG_V6, 3, + INS_OPTS_SCALABLE_S); // SMLSLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlalb, EA_SCALABLE, REG_V8, REG_V9, REG_V8, 0, + INS_OPTS_SCALABLE_S); // UMLALB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlalt, EA_SCALABLE, REG_V10, REG_V11, REG_V10, 1, + INS_OPTS_SCALABLE_S); // UMLALT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlslb, EA_SCALABLE, REG_V12, REG_V13, REG_V12, 2, + INS_OPTS_SCALABLE_S); // UMLSLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umlslt, EA_SCALABLE, REG_V14, REG_V15, REG_V14, 3, + INS_OPTS_SCALABLE_S); // UMLSLT .D, .S, .S[] + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index c2519320e33704..842a6910b64abd 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1157,6 +1157,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1166,6 +1167,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -11543,6 +11545,33 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_sve_smlalb: + case INS_sve_smlalt: + case INS_sve_umlalb: + case INS_sve_umlalt: + case INS_sve_smlslb: + case INS_sve_smlslt: + case INS_sve_umlslb: + case INS_sve_umlslt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FG_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FG_3B; + } + break; + default: unreached(); break; @@ -20164,6 +20193,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -20175,6 +20205,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -23354,6 +23385,7 @@ void emitter::emitDispInsHelp( // .S, .H, .H[] case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) // .S, .B, .B[] case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) @@ -23367,6 +23399,8 @@ void emitter::emitDispInsHelp( // .D, .S, .S[] case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + // .D, .S, .S[] + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmm @@ -26475,6 +26509,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; From e891973ff08da68c81f198ae0c7dd1d14482e8df Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 7 Feb 2024 22:27:39 -0500 Subject: [PATCH 3/7] Implement IF_SVE_FH_3{A,B} --- src/coreclr/jit/codegenarm64test.cpp | 34 +++++++++++++++++++++++++--- src/coreclr/jit/emitarm64.cpp | 29 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index ff3a414cdc3ff9..20bdaf19bcb6fa 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5903,11 +5903,19 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_FE_3B theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, INS_OPTS_SCALABLE_S); // SMULLB .D, .S, .S[] - theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, + theEmitter->emitIns_R_R_R_I(INS_sve_smullb, EA_SCALABLE, REG_V2, REG_V3, REG_V2, 1, + INS_OPTS_SCALABLE_S); // SMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V4, REG_V5, REG_V4, 2, + INS_OPTS_SCALABLE_S); // SMULLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_smullt, EA_SCALABLE, REG_V6, REG_V7, REG_V6, 3, INS_OPTS_SCALABLE_S); // SMULLT .D, .S, .S[] - theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 2, + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V8, REG_V9, REG_V8, 0, + INS_OPTS_SCALABLE_S); // UMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullb, EA_SCALABLE, REG_V10, REG_V11, REG_V10, 1, INS_OPTS_SCALABLE_S); // UMULLB .D, .S, .S[] - theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 3, + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V12, REG_V13, REG_V12, 2, + INS_OPTS_SCALABLE_S); // UMULLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_umullt, EA_SCALABLE, REG_V14, REG_V15, REG_V14, 3, INS_OPTS_SCALABLE_S); // UMULLT .D, .S, .S[] // IF_SVE_FG_3A @@ -5946,6 +5954,26 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_umlslt, EA_SCALABLE, REG_V14, REG_V15, REG_V14, 3, INS_OPTS_SCALABLE_S); // UMLSLT .D, .S, .S[] + // IF_SVE_FH_3A + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullb, EA_SCALABLE, REG_V0, REG_V2, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SQDMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullb, EA_SCALABLE, REG_V4, REG_V6, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SQDMULLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V8, REG_V10, REG_V5, 5, + INS_OPTS_SCALABLE_H); // SQDMULLT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V12, REG_V14, REG_V7, 7, + INS_OPTS_SCALABLE_H); // SQDMULLT .S, .H, .H[] + + // IF_SVE_FH_3B + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullb, EA_SCALABLE, REG_V0, REG_V2, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SQDMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullb, EA_SCALABLE, REG_V4, REG_V6, REG_V5, 1, + INS_OPTS_SCALABLE_S); // SQDMULLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V8, REG_V10, REG_V10, 2, + INS_OPTS_SCALABLE_S); // SQDMULLT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V12, REG_V14, REG_V15, 3, + INS_OPTS_SCALABLE_S); // SQDMULLT .D, .S, .S[] + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 842a6910b64abd..07172cdf39ed39 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1158,6 +1158,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1168,6 +1169,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -11572,6 +11574,27 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_sve_sqdmullb: + case INS_sve_sqdmullt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FH_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FH_3B; + } + break; + default: unreached(); break; @@ -20194,6 +20217,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -20206,6 +20230,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -23391,6 +23416,7 @@ void emitter::emitDispInsHelp( case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) // .S, .H, .H[] case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm @@ -23399,6 +23425,7 @@ void emitter::emitDispInsHelp( // .D, .S, .S[] case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) // .D, .S, .S[] case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd @@ -26511,6 +26538,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; From 1eeb871daf693424e76d3ec1d3dabfc1199e742b Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 7 Feb 2024 22:48:24 -0500 Subject: [PATCH 4/7] Implement IF_SVE_FI_3{A,B,C} --- src/coreclr/jit/codegenarm64test.cpp | 30 +++++++++++++++++++++ src/coreclr/jit/emitarm64.cpp | 39 ++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 20bdaf19bcb6fa..4d5ea2a423cbb5 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5974,6 +5974,36 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_sqdmullt, EA_SCALABLE, REG_V12, REG_V14, REG_V15, 3, INS_OPTS_SCALABLE_S); // SQDMULLT .D, .S, .S[] + // IF_SVE_FI_3A + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SQDMULH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SQDMULH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5, + INS_OPTS_SCALABLE_H); // SQRDMULH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V6, REG_V7, REG_V7, 7, + INS_OPTS_SCALABLE_H); // SQRDMULH .H, .H, .H[] + + // IF_SVE_FI_3B + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V8, REG_V9, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SQDMULH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V10, REG_V11, REG_V2, 1, + INS_OPTS_SCALABLE_S); // SQDMULH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V12, REG_V13, REG_V4, 2, + INS_OPTS_SCALABLE_S); // SQRDMULH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V14, REG_V15, REG_V6, 3, + INS_OPTS_SCALABLE_S); // SQRDMULH .S, .S, .S[] + + // IF_SVE_FI_3C + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V16, REG_V17, REG_V0, 0, + INS_OPTS_SCALABLE_D); // SQDMULH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmulh, EA_SCALABLE, REG_V18, REG_V19, REG_V5, 1, + INS_OPTS_SCALABLE_D); // SQDMULH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V20, REG_V21, REG_V10, 0, + INS_OPTS_SCALABLE_D); // SQRDMULH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, + INS_OPTS_SCALABLE_D); // SQRDMULH .D, .D, .D[] + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 07172cdf39ed39..6ffe3843dcdd6c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1147,6 +1147,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1159,6 +1160,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1179,6 +1181,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -11595,6 +11598,33 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_sve_sqdmulh: + case INS_sve_sqrdmulh: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FI_3A; + } + else if (opt == INS_OPTS_SCALABLE_S) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_FI_3B; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidImm1(imm)); // i + fmt = IF_SVE_FI_3C; + } + break; + default: unreached(); break; @@ -20196,6 +20226,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -20205,6 +20236,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -20242,6 +20274,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -23436,10 +23469,13 @@ void emitter::emitDispInsHelp( // .H, .H, .H[] case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) // .S, .S, .S[] case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) // .D, .D, .D[] case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm @@ -26455,6 +26491,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_5C; break; From 0996a0624f241ad426cd324a5b53f0324a96ba4f Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 7 Feb 2024 23:03:37 -0500 Subject: [PATCH 5/7] Implement IF_SVE_FJ_3{A,B} --- src/coreclr/jit/codegenarm64test.cpp | 20 ++++++++++++++++++ src/coreclr/jit/emitarm64.cpp | 31 ++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 4d5ea2a423cbb5..3cd30c559a6b89 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -6004,6 +6004,26 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmulh, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, INS_OPTS_SCALABLE_D); // SQRDMULH .D, .D, .D[] + // IF_SVE_FJ_3A + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SQDMLALB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlalt, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SQDMLALT .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlslb, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5, + INS_OPTS_SCALABLE_H); // SQDMLSLB .S, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlslt, EA_SCALABLE, REG_V6, REG_V0, REG_V7, 7, + INS_OPTS_SCALABLE_H); // SQDMLSLT .S, .H, .H[] + + // IF_SVE_FJ_3B + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlalb, EA_SCALABLE, REG_V8, REG_V9, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SQDMLALB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlalt, EA_SCALABLE, REG_V10, REG_V11, REG_V5, 1, + INS_OPTS_SCALABLE_S); // SQDMLALT .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlslb, EA_SCALABLE, REG_V12, REG_V13, REG_V10, 2, + INS_OPTS_SCALABLE_S); // SQDMLSLB .D, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqdmlslt, EA_SCALABLE, REG_V14, REG_V15, REG_V15, 3, + INS_OPTS_SCALABLE_S); // SQDMLSLT .D, .S, .S[] + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 6ffe3843dcdd6c..71b86ccc308b14 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1161,6 +1161,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1172,6 +1173,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -11625,6 +11627,29 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_sve_sqdmlalb: + case INS_sve_sqdmlalt: + case INS_sve_sqdmlslb: + case INS_sve_sqdmlslt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FJ_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_FJ_3B; + } + break; + default: unreached(); break; @@ -20250,6 +20275,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -20263,6 +20289,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -23444,6 +23471,7 @@ void emitter::emitDispInsHelp( // .S, .H, .H[] case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) // .S, .B, .B[] case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) @@ -23461,6 +23489,7 @@ void emitter::emitDispInsHelp( case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) // .D, .S, .S[] case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmm @@ -26579,6 +26608,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; From d9875a0a1f09bd64f72f8f36926606df25b22aae Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 8 Feb 2024 11:03:56 -0500 Subject: [PATCH 6/7] temp --- src/coreclr/jit/emitarm64.cpp | 121 ---------------------------------- 1 file changed, 121 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 71b86ccc308b14..c9ffafe27dec3f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11529,127 +11529,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_JN_3A; break; - case INS_sve_smullb: - case INS_sve_smullt: - case INS_sve_umullb: - case INS_sve_umullt: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); - - if (opt == INS_OPTS_SCALABLE_H) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm3(imm)); // ii i - fmt = IF_SVE_FE_3A; - } - else - { - assert(opt == INS_OPTS_SCALABLE_S); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidUimm2(imm)); // i i - fmt = IF_SVE_FE_3B; - } - break; - - case INS_sve_smlalb: - case INS_sve_smlalt: - case INS_sve_umlalb: - case INS_sve_umlalt: - case INS_sve_smlslb: - case INS_sve_smlslt: - case INS_sve_umlslb: - case INS_sve_umlslt: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); - - if (opt == INS_OPTS_SCALABLE_H) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm3(imm)); // ii i - fmt = IF_SVE_FG_3A; - } - else - { - assert(opt == INS_OPTS_SCALABLE_S); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidUimm2(imm)); // i i - fmt = IF_SVE_FG_3B; - } - break; - - case INS_sve_sqdmullb: - case INS_sve_sqdmullt: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); - - if (opt == INS_OPTS_SCALABLE_H) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm3(imm)); // ii i - fmt = IF_SVE_FH_3A; - } - else - { - assert(opt == INS_OPTS_SCALABLE_S); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidUimm2(imm)); // i i - fmt = IF_SVE_FH_3B; - } - break; - - case INS_sve_sqdmulh: - case INS_sve_sqrdmulh: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); - - if (opt == INS_OPTS_SCALABLE_H) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm3(imm)); // ii i - fmt = IF_SVE_FI_3A; - } - else if (opt == INS_OPTS_SCALABLE_S) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_FI_3B; - } - else - { - assert(opt == INS_OPTS_SCALABLE_D); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidImm1(imm)); // i - fmt = IF_SVE_FI_3C; - } - break; - - case INS_sve_sqdmlalb: - case INS_sve_sqdmlalt: - case INS_sve_sqdmlslb: - case INS_sve_sqdmlslt: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); - - if (opt == INS_OPTS_SCALABLE_H) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm3(imm)); // ii i - fmt = IF_SVE_FJ_3A; - } - else - { - assert(opt == INS_OPTS_SCALABLE_S); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_FJ_3B; - } - break; - default: unreached(); break; From 1fb0823cda386e89570b4eaafd2c88beb76eb7ab Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 8 Feb 2024 11:15:08 -0500 Subject: [PATCH 7/7] Fix --- src/coreclr/jit/emitarm64.cpp | 121 ++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 9bc18f1d164a1a..2fe4ae43110c86 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11716,6 +11716,127 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_sve_smullb: + case INS_sve_smullt: + case INS_sve_umullb: + case INS_sve_umullt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FE_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FE_3B; + } + break; + + case INS_sve_smlalb: + case INS_sve_smlalt: + case INS_sve_umlalb: + case INS_sve_umlalt: + case INS_sve_smlslb: + case INS_sve_smlslt: + case INS_sve_umlslb: + case INS_sve_umlslt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FG_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FG_3B; + } + break; + + case INS_sve_sqdmullb: + case INS_sve_sqdmullt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FH_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // i i + fmt = IF_SVE_FH_3B; + } + break; + + case INS_sve_sqdmulh: + case INS_sve_sqrdmulh: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FI_3A; + } + else if (opt == INS_OPTS_SCALABLE_S) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_FI_3B; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidImm1(imm)); // i + fmt = IF_SVE_FI_3C; + } + break; + + case INS_sve_sqdmlalb: + case INS_sve_sqdmlalt: + case INS_sve_sqdmlslb: + case INS_sve_sqdmlslt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_FJ_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_FJ_3B; + } + break; + default: unreached(); break;