Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions src/coreclr/src/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2781,9 +2781,6 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo
// Return Value:
// The regNumber, if any, allocated to the RefPositon. Returns REG_NA if no free register is found.
//
// Notes:
// TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as
// reg predict does

static const regNumber lsraRegOrder[] = {REG_VAR_ORDER};
const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder);
Expand Down Expand Up @@ -3075,6 +3072,10 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition*
// we'll need to avoid the short-circuit if we've got a stress option to reverse
// the selection.
int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE;
if (currentInterval->isConstant)
{
bestPossibleScore |= VALUE_AVAILABLE;
}
if (relatedPreferences != RBM_NONE)
{
bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED;
Expand Down Expand Up @@ -3244,7 +3245,7 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition*
}

// there is no way we can get a better score so break out
if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1)
if (!reverseSelect && score == bestPossibleScore && bestLocation == lastLocation + 1)
{
break;
}
Expand Down Expand Up @@ -4017,7 +4018,8 @@ bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG
{
Interval* assignedInterval = regRec->assignedInterval;

if ((assignedInterval == nullptr) || assignedInterval->getNextRefLocation() > lastLocation)
if ((assignedInterval == nullptr) || (assignedInterval->physReg != regRec->regNum) ||
(assignedInterval->getNextRefLocation() > lastLocation))
{
#ifdef TARGET_ARM
if (newRegType == TYP_DOUBLE)
Expand Down Expand Up @@ -5822,6 +5824,7 @@ void LinearScan::allocateRegisters()
if (keepAssignment == false)
{
currentRefPosition->registerAssignment = allRegs(currentInterval->registerType);
currentRefPosition->isFixedRegRef = false;
unassignPhysRegNoSpill(physRegRecord);

// If the preferences are currently set to just this register, reset them to allRegs
Expand Down Expand Up @@ -10019,6 +10022,12 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event,
if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK))
{
registersToDump |= genRegMask(reg);
#ifdef TARGET_ARM
if (interval->registerType == TYP_DOUBLE)
{
registersToDump |= genRegMask((regNumber)(reg + 1));
}
#endif
dumpRegRecordTitleIfNeeded();
}

Expand Down Expand Up @@ -10168,7 +10177,7 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event,

case LSRA_EVENT_ALLOC_SPILLED_REG:
dumpRefPositionShort(activeRefPosition, currentBlock);
printf("Steal %-4s ", getRegName(reg));
printf("Alloc %-4s ", getRegName(reg));
break;

case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED:
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -1551,7 +1551,7 @@ class LinearScan : public LinearScanInterface

int BuildSimple(GenTree* tree);
int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE);
int BuildDelayFreeUses(GenTree* node, regMaskTP candidates = RBM_NONE);
int BuildDelayFreeUses(GenTree* node, GenTree* rmwNode = nullptr, regMaskTP candidates = RBM_NONE);
int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE);
int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE);
void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/src/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1098,8 +1098,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
if (isRMW)
{
srcCount += BuildDelayFreeUses(intrin.op2);
srcCount += BuildDelayFreeUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS);
srcCount += BuildDelayFreeUses(intrin.op2, nullptr);
srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS);
}
else
{
Expand Down
63 changes: 44 additions & 19 deletions src/coreclr/src/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3032,56 +3032,81 @@ void LinearScan::setDelayFree(RefPosition* use)
// and which need to be marked delayRegFree
//
// Arguments:
// node - The node of interest
// node - The node of interest
// rmwNode - The node that has RMW semantics (if applicable)
// candidates - The set of candidates for the uses
//
// Return Value:
// The number of source registers used by the *parent* of this node.
//
int LinearScan::BuildDelayFreeUses(GenTree* node, regMaskTP candidates)
int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, regMaskTP candidates)
{
RefPosition* use;
RefPosition* use = nullptr;
Interval* rmwInterval = nullptr;
bool rmwIsLastUse = false;
GenTree* addr = nullptr;
if ((rmwNode != nullptr) && isCandidateLocalRef(rmwNode))
{
rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar());
// Note: we don't handle multi-reg vars here. It's not clear that there are any cases
// where we'd encounter a multi-reg var in an RMW context.
rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0);
}
if (!node->isContained())
{
use = BuildUse(node, candidates);
setDelayFree(use);
return 1;
}
if (node->OperIsHWIntrinsic())
else if (node->OperIsHWIntrinsic())
{
use = BuildUse(node->gtGetOp1(), candidates);
setDelayFree(use);
return 1;
}
if (!node->OperIsIndir())
else if (!node->OperIsIndir())
{
return 0;
}
GenTreeIndir* indirTree = node->AsIndir();
GenTree* addr = indirTree->gtOp1;
if (!addr->isContained())
else
{
use = BuildUse(addr, candidates);
setDelayFree(use);
return 1;
GenTreeIndir* indirTree = node->AsIndir();
addr = indirTree->gtOp1;
if (!addr->isContained())
{
use = BuildUse(addr, candidates);
}
else if (!addr->OperIs(GT_LEA))
{
return 0;
}
}
if (!addr->OperIs(GT_LEA))
if (use != nullptr)
{
return 0;
if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse))
{
setDelayFree(use);
}
return 1;
}

// If we reach here we have a contained LEA in 'addr'.

GenTreeAddrMode* const addrMode = addr->AsAddrMode();

unsigned srcCount = 0;
if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
{
use = BuildUse(addrMode->Base(), candidates);
setDelayFree(use);
if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse))
{
setDelayFree(use);
}
srcCount++;
}
if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
{
use = BuildUse(addrMode->Index(), candidates);
setDelayFree(use);
if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse))
{
setDelayFree(use);
}
srcCount++;
}
return srcCount;
Expand Down
26 changes: 13 additions & 13 deletions src/coreclr/src/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates)
}
else if (delayUseOperand == op1)
{
srcCount += BuildDelayFreeUses(op1, op1Candidates);
srcCount += BuildDelayFreeUses(op1, op2, op1Candidates);
}
else
{
Expand All @@ -893,7 +893,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates)
}
else if (delayUseOperand == op2)
{
srcCount += BuildDelayFreeUses(op2, op2Candidates);
srcCount += BuildDelayFreeUses(op2, op1, op2Candidates);
}
else
{
Expand Down Expand Up @@ -987,7 +987,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
{
if (!shiftBy->isContained())
{
srcCount += BuildDelayFreeUses(shiftBy, RBM_RCX);
srcCount += BuildDelayFreeUses(shiftBy, source, RBM_RCX);
buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX);
}
BuildDef(tree, dstCandidates);
Expand Down Expand Up @@ -1778,7 +1778,7 @@ int LinearScan::BuildModDiv(GenTree* tree)
srcCount = 1;
}

srcCount += BuildDelayFreeUses(op2, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
srcCount += BuildDelayFreeUses(op2, op1, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));

buildInternalRegisterUses();

Expand Down Expand Up @@ -2341,8 +2341,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
tgtPrefUse = BuildUse(op1);

srcCount += 1;
srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2);
srcCount += BuildDelayFreeUses(op3, RBM_XMM0);
srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1);
srcCount += BuildDelayFreeUses(op3, op1, RBM_XMM0);

buildUses = false;
}
Expand Down Expand Up @@ -2378,7 +2378,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
tgtPrefUse = BuildUse(op1);

srcCount += 1;
srcCount += BuildDelayFreeUses(op2, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE);
srcCount += BuildDelayFreeUses(op2, op1, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE);

buildUses = false;
break;
Expand All @@ -2395,7 +2395,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
// op3 reg should be different from target reg to
// store the lower half result after executing the instruction
srcCount += BuildDelayFreeUses(op3);
srcCount += BuildDelayFreeUses(op3, op1);
// Need a internal register different from the dst to take the lower half result
buildInternalIntRegisterDefForNode(intrinsicTree);
setInternalRegsDelayFree = true;
Expand Down Expand Up @@ -2431,7 +2431,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)

srcCount += 1;
srcCount += BuildOperandUses(op2);
srcCount += BuildDelayFreeUses(op3);
srcCount += BuildDelayFreeUses(op3, op1);
}
else if (op1->isContained())
{
Expand All @@ -2440,7 +2440,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
tgtPrefUse = BuildUse(op3);

srcCount += BuildOperandUses(op1);
srcCount += BuildDelayFreeUses(op2);
srcCount += BuildDelayFreeUses(op2, op1);
srcCount += 1;
}
else
Expand All @@ -2452,15 +2452,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)

if (copiesUpperBits)
{
srcCount += BuildDelayFreeUses(op2);
srcCount += BuildDelayFreeUses(op2, op1);
}
else
{
tgtPrefUse2 = BuildUse(op2);
srcCount += 1;
}

srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3);
srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);
}

buildUses = false;
Expand All @@ -2475,7 +2475,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)

// Any pair of the index, mask, or destination registers should be different
srcCount += BuildOperandUses(op1);
srcCount += BuildDelayFreeUses(op2);
srcCount += BuildDelayFreeUses(op2, op1);

// op3 should always be contained
assert(op3->isContained());
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/src/jit/rationalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -943,9 +943,9 @@ PhaseStatus Rationalizer::DoPhase()
for (Statement* statement : StatementList(firstStatement))
{
assert(statement->GetTreeList() != nullptr);
assert(statement->GetTreeList()->gtPrev == nullptr);
noway_assert(statement->GetTreeList()->gtPrev == nullptr);
assert(statement->GetRootNode() != nullptr);
assert(statement->GetRootNode()->gtNext == nullptr);
noway_assert(statement->GetRootNode()->gtNext == nullptr);

BlockRange().InsertAtEnd(LIR::Range(statement->GetTreeList(), statement->GetRootNode()));

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -1288,7 +1288,7 @@ typedef unsigned char regNumberSmall;
REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, \
REG_R11, REG_R13, REG_R14, \
REG_R12, REG_R15, REG_IP0, REG_IP1, \
REG_CALLEE_SAVED_ORDER
REG_CALLEE_SAVED_ORDER, REG_LR

#define REG_VAR_ORDER_FLT REG_V16, REG_V17, REG_V18, REG_V19, \
REG_V20, REG_V21, REG_V22, REG_V23, \
Expand Down