diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index bd94968c20376e..1e9c136a99c21e 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -2781,9 +2781,6 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // Return Value: // The regNumber, if any, allocated to the RefPositon. Returns REG_NA if no free register is found. // -// Notes: -// TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as -// reg predict does static const regNumber lsraRegOrder[] = {REG_VAR_ORDER}; const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder); @@ -3075,6 +3072,10 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* // we'll need to avoid the short-circuit if we've got a stress option to reverse // the selection. int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE; + if (currentInterval->isConstant) + { + bestPossibleScore |= VALUE_AVAILABLE; + } if (relatedPreferences != RBM_NONE) { bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED; @@ -3244,7 +3245,7 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* } // there is no way we can get a better score so break out - if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1) + if (!reverseSelect && score == bestPossibleScore && bestLocation == lastLocation + 1) { break; } @@ -4017,7 +4018,8 @@ bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG { Interval* assignedInterval = regRec->assignedInterval; - if ((assignedInterval == nullptr) || assignedInterval->getNextRefLocation() > lastLocation) + if ((assignedInterval == nullptr) || (assignedInterval->physReg != regRec->regNum) || + (assignedInterval->getNextRefLocation() > lastLocation)) { #ifdef TARGET_ARM if (newRegType == TYP_DOUBLE) @@ -5822,6 +5824,7 @@ void LinearScan::allocateRegisters() if (keepAssignment == false) { currentRefPosition->registerAssignment = allRegs(currentInterval->registerType); + currentRefPosition->isFixedRegRef = false; unassignPhysRegNoSpill(physRegRecord); // If the preferences are currently set to just this register, reset them to allRegs @@ -10019,6 +10022,12 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK)) { registersToDump |= genRegMask(reg); +#ifdef TARGET_ARM + if (interval->registerType == TYP_DOUBLE) + { + registersToDump |= genRegMask((regNumber)(reg + 1)); + } +#endif dumpRegRecordTitleIfNeeded(); } @@ -10168,7 +10177,7 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, case LSRA_EVENT_ALLOC_SPILLED_REG: dumpRefPositionShort(activeRefPosition, currentBlock); - printf("Steal %-4s ", getRegName(reg)); + printf("Alloc %-4s ", getRegName(reg)); break; case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED: diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 0d443f900c6cd8..30571ff7104422 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -1551,7 +1551,7 @@ class LinearScan : public LinearScanInterface int BuildSimple(GenTree* tree); int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE); - int BuildDelayFreeUses(GenTree* node, regMaskTP candidates = RBM_NONE); + int BuildDelayFreeUses(GenTree* node, GenTree* rmwNode = nullptr, regMaskTP candidates = RBM_NONE); int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 9c67a05cc55657..9463b1f4820870 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -1098,8 +1098,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { if (isRMW) { - srcCount += BuildDelayFreeUses(intrin.op2); - srcCount += BuildDelayFreeUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += BuildDelayFreeUses(intrin.op2, nullptr); + srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); } else { diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index bdcd8dcd3999cb..4aff7d1a369b05 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -3032,56 +3032,81 @@ void LinearScan::setDelayFree(RefPosition* use) // and which need to be marked delayRegFree // // Arguments: -// node - The node of interest +// node - The node of interest +// rmwNode - The node that has RMW semantics (if applicable) +// candidates - The set of candidates for the uses // // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildDelayFreeUses(GenTree* node, regMaskTP candidates) +int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, regMaskTP candidates) { - RefPosition* use; + RefPosition* use = nullptr; + Interval* rmwInterval = nullptr; + bool rmwIsLastUse = false; + GenTree* addr = nullptr; + if ((rmwNode != nullptr) && isCandidateLocalRef(rmwNode)) + { + rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar()); + // Note: we don't handle multi-reg vars here. It's not clear that there are any cases + // where we'd encounter a multi-reg var in an RMW context. + rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0); + } if (!node->isContained()) { use = BuildUse(node, candidates); - setDelayFree(use); - return 1; } - if (node->OperIsHWIntrinsic()) + else if (node->OperIsHWIntrinsic()) { use = BuildUse(node->gtGetOp1(), candidates); - setDelayFree(use); - return 1; } - if (!node->OperIsIndir()) + else if (!node->OperIsIndir()) { return 0; } - GenTreeIndir* indirTree = node->AsIndir(); - GenTree* addr = indirTree->gtOp1; - if (!addr->isContained()) + else { - use = BuildUse(addr, candidates); - setDelayFree(use); - return 1; + GenTreeIndir* indirTree = node->AsIndir(); + addr = indirTree->gtOp1; + if (!addr->isContained()) + { + use = BuildUse(addr, candidates); + } + else if (!addr->OperIs(GT_LEA)) + { + return 0; + } } - if (!addr->OperIs(GT_LEA)) + if (use != nullptr) { - return 0; + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } + return 1; } + // If we reach here we have a contained LEA in 'addr'. + GenTreeAddrMode* const addrMode = addr->AsAddrMode(); unsigned srcCount = 0; if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) { use = BuildUse(addrMode->Base(), candidates); - setDelayFree(use); + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } srcCount++; } if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) { use = BuildUse(addrMode->Index(), candidates); - setDelayFree(use); + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } srcCount++; } return srcCount; diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index db787c55fdf282..133bcf28acf5a5 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -876,7 +876,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates) } else if (delayUseOperand == op1) { - srcCount += BuildDelayFreeUses(op1, op1Candidates); + srcCount += BuildDelayFreeUses(op1, op2, op1Candidates); } else { @@ -893,7 +893,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates) } else if (delayUseOperand == op2) { - srcCount += BuildDelayFreeUses(op2, op2Candidates); + srcCount += BuildDelayFreeUses(op2, op1, op2Candidates); } else { @@ -987,7 +987,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { if (!shiftBy->isContained()) { - srcCount += BuildDelayFreeUses(shiftBy, RBM_RCX); + srcCount += BuildDelayFreeUses(shiftBy, source, RBM_RCX); buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); } BuildDef(tree, dstCandidates); @@ -1778,7 +1778,7 @@ int LinearScan::BuildModDiv(GenTree* tree) srcCount = 1; } - srcCount += BuildDelayFreeUses(op2, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); + srcCount += BuildDelayFreeUses(op2, op1, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); buildInternalRegisterUses(); @@ -2341,8 +2341,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op1); srcCount += 1; - srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2); - srcCount += BuildDelayFreeUses(op3, RBM_XMM0); + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); + srcCount += BuildDelayFreeUses(op3, op1, RBM_XMM0); buildUses = false; } @@ -2378,7 +2378,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op1); srcCount += 1; - srcCount += BuildDelayFreeUses(op2, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); + srcCount += BuildDelayFreeUses(op2, op1, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); buildUses = false; break; @@ -2395,7 +2395,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { // op3 reg should be different from target reg to // store the lower half result after executing the instruction - srcCount += BuildDelayFreeUses(op3); + srcCount += BuildDelayFreeUses(op3, op1); // Need a internal register different from the dst to take the lower half result buildInternalIntRegisterDefForNode(intrinsicTree); setInternalRegsDelayFree = true; @@ -2431,7 +2431,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += 1; srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op3); + srcCount += BuildDelayFreeUses(op3, op1); } else if (op1->isContained()) { @@ -2440,7 +2440,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op3); srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); srcCount += 1; } else @@ -2452,7 +2452,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (copiesUpperBits) { - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); } else { @@ -2460,7 +2460,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += 1; } - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } buildUses = false; @@ -2475,7 +2475,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); // op3 should always be contained assert(op3->isContained()); diff --git a/src/coreclr/src/jit/rationalize.cpp b/src/coreclr/src/jit/rationalize.cpp index ebdac7725ab7f6..e0f37a026dc6ca 100644 --- a/src/coreclr/src/jit/rationalize.cpp +++ b/src/coreclr/src/jit/rationalize.cpp @@ -943,9 +943,9 @@ PhaseStatus Rationalizer::DoPhase() for (Statement* statement : StatementList(firstStatement)) { assert(statement->GetTreeList() != nullptr); - assert(statement->GetTreeList()->gtPrev == nullptr); + noway_assert(statement->GetTreeList()->gtPrev == nullptr); assert(statement->GetRootNode() != nullptr); - assert(statement->GetRootNode()->gtNext == nullptr); + noway_assert(statement->GetRootNode()->gtNext == nullptr); BlockRange().InsertAtEnd(LIR::Range(statement->GetTreeList(), statement->GetRootNode())); diff --git a/src/coreclr/src/jit/target.h b/src/coreclr/src/jit/target.h index a2b5f93387662e..5d3e7ad96d14c3 100644 --- a/src/coreclr/src/jit/target.h +++ b/src/coreclr/src/jit/target.h @@ -1288,7 +1288,7 @@ typedef unsigned char regNumberSmall; REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, \ REG_R11, REG_R13, REG_R14, \ REG_R12, REG_R15, REG_IP0, REG_IP1, \ - REG_CALLEE_SAVED_ORDER + REG_CALLEE_SAVED_ORDER, REG_LR #define REG_VAR_ORDER_FLT REG_V16, REG_V17, REG_V18, REG_V19, \ REG_V20, REG_V21, REG_V22, REG_V23, \