-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Arm64: Implement VectorTableLookup/VectorTableLookupExtension intrinsinsic + Consecutive registers support #80297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
c92fbdf
bee0f8c
426f68a
83eb54c
d06c4a2
99adc17
7b10969
87f66f7
98e7bd2
786e350
e0a82b3
47848b0
25a738d
e492829
385abf1
2f4a4e3
8d3744b
8d66d45
85d90f5
0a0faed
036a273
791563a
db6036b
fdc94ed
cf84fda
06a78d4
2afe249
a086ab7
450a08d
5bb9302
8027c5a
6b1ba8a
723477b
c6e77e4
5696a6e
a9e1a7a
2617b77
2d4fd5c
6a21205
c6d338f
db4f846
bce8c5a
3d15fcb
75f142b
e4cbad9
96de024
11b345a
4526b41
46f0abd
6ef7c68
b0b6a5e
0197b73
2734023
1cb22d0
6e30b3a
721823b
5b9fac5
cb29aee
53b07b5
302d3ba
5e828f1
fc93cc2
78e87cd
60d383e
05f9fc6
b52059e
0721ad4
8a5c696
e64527b
22270c5
f3884fd
f2a1f19
985fe25
ab043fd
13601eb
7bf9105
40aa7c7
1f95637
e7bb069
6ebb12a
2d75291
68cd4d7
7b83053
903c3de
a8ec819
961e9c2
b9d0f15
cbe999f
6665536
2b9f49e
5fec6e1
5371c30
4875925
597e6de
4a1171d
1f124a4
9686773
55071f6
757c682
79e0bd5
a44cf60
5fefae6
2a5e52c
a17b44f
3c390d8
a9995e6
ae2e633
02f8ad2
984c6ee
8fe130a
7f8e77f
090bf26
c91bc77
1591deb
0c4d71f
0c56514
35a7550
ff587ac
dab2121
e94cfcf
ab007d0
5d6cc2d
24e6158
4026aa6
53c91f0
7d168b2
7cffe7a
dd10bbe
b4ea77e
0dc4ea6
6d9e136
f247b3c
e8d3ee5
524d983
289110d
d778833
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…UpperVectorSave
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,80 +69,49 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit | |
| // are part of the range. | ||
|
|
||
| if (!areNextConsecutiveRegistersFree(firstRegAssigned, firstRefPosition->regCount, | ||
| firstRefPosition->getInterval()->registerType)) | ||
| firstRefPosition->getInterval()->registerType)) | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| // RefPosition* consecutiveRefPosition = firstRefPosition; | ||
| // regNumber regToAssign = firstRegAssigned; | ||
| // while (consecutiveRefPosition != nullptr) | ||
| // { | ||
| // if (isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)) | ||
| // { | ||
| // return false; | ||
| // } | ||
| // | ||
| //#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE | ||
| // if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) | ||
| // { | ||
| // consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); | ||
| // | ||
| // assert(consecutiveRefPosition->refType == RefTypeUse); | ||
| // assert(!isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)); | ||
| // } | ||
| //#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE | ||
| // | ||
| // consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); | ||
| // regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); | ||
| // } | ||
|
|
||
| RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); | ||
| regNumber regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); | ||
| INDEBUG(int refPosCount = 0); | ||
|
|
||
| // First refposition should always start with RefTypeUse | ||
| assert(firstRefPosition->refType != RefTypeUpperVectorRestore); | ||
|
|
||
| INDEBUG(int refPosCount = 1); | ||
| regMaskTP busyConsecutiveRegMask = ~(((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned); | ||
|
|
||
| while (consecutiveRefPosition != nullptr) | ||
| { | ||
| assert(consecutiveRefPosition->regCount == 0); | ||
| #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE | ||
| if ((consecutiveRefPosition->refType == RefTypeUpperVectorRestore)) | ||
| { | ||
| if (consecutiveRefPosition->getInterval()->isPartiallySpilled) | ||
| { | ||
| // Make sure that restore doesn't get one of the registers that are part of series we are trying to set | ||
| // currently. | ||
| // TODO-CQ: We could technically assign RefTypeUpperVectorRestore and its RefTypeUse same register, but | ||
| // during register selection, it might get tricky to know which of the busy registers are assigned to | ||
| // RefTypeUpperVectorRestore positions of corresponding variables for which (another criteria) | ||
| // we are trying to find consecutive registers. | ||
|
|
||
| consecutiveRefPosition->registerAssignment &= ~busyConsecutiveRegMask; | ||
| } | ||
| consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); | ||
kunalspathak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE | ||
| INDEBUG(refPosCount++); | ||
| assert(consecutiveRefPosition->refType == RefTypeUse); | ||
| consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); | ||
| consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); | ||
| regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); | ||
|
|
||
| INDEBUG(refPosCount++); | ||
| } | ||
|
|
||
| // while (consecutiveRefPosition != nullptr) | ||
| // { | ||
| // consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); | ||
| //#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE | ||
| // if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) | ||
| // { | ||
| // // For restore refPosition, make sure to have same assignment for it and the next one | ||
| // // which is the use of the variable. | ||
| // consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); | ||
| // consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); | ||
| // } | ||
| //#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE | ||
| // consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); | ||
| // regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); | ||
| // | ||
| // INDEBUG(refPosCount++); | ||
| // } | ||
|
|
||
| assert(refPosCount == firstRefPosition->regCount); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -182,7 +151,7 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo | |
| return result; | ||
| } | ||
|
|
||
| unsigned int registersNeeded = refPosition->regCount; | ||
| unsigned int registersNeeded = refPosition->regCount; | ||
| regMaskTP currAvailableRegs = result; | ||
| if (BitOperations::PopCount(currAvailableRegs) < registersNeeded) | ||
| { | ||
|
|
@@ -194,19 +163,19 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo | |
| // At this point, for 'n' registers requirement, if Rm+1, Rm+2, Rm+3, ..., Rm+k are | ||
kunalspathak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // available, create the mask only for Rm+1, Rm+2, ..., Rm+(k-n+1) to convey that it | ||
| // is safe to assign any of those registers, but not beyond that. | ||
| #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ | ||
| regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ | ||
| regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ | ||
| consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ | ||
| #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this could be a lambda instead?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No preferences honestly. I thought having |
||
| regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ | ||
| regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ | ||
| consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't it the case here that
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's right, and we want to just extract the 1st of those bits to be set in |
||
| overallResult |= availableRegistersMask; | ||
|
|
||
| regMaskTP overallResult = RBM_NONE; | ||
| regMaskTP consecutiveResult = RBM_NONE; | ||
| uint32_t regAvailableStartIndex = 0, regAvailableEndIndex = 0; | ||
| uint32_t regAvailableStartIndex = 0, regAvailableEndIndex = 0; | ||
| do | ||
kunalspathak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
| // From LSB, find the first available register (bit `1`) | ||
| regAvailableStartIndex = BitOperations::_BitScanForward(currAvailableRegs); | ||
| regAvailableStartIndex = BitOperations::_BitScanForward(currAvailableRegs); | ||
| regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; | ||
|
|
||
| // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. | ||
|
|
@@ -1446,7 +1415,10 @@ int LinearScan::BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode) | |
| restoreRefPos->regCount = 0; | ||
| if (firstRefPos == nullptr) | ||
| { | ||
| firstRefPos = restoreRefPos; | ||
| // Always set the non UpperVectorRestore. UpperVectorRestore can be assigned | ||
| // different independent register. | ||
| // See TODO-CQ in setNextConsecutiveRegisterAssignment(). | ||
| firstRefPos = currRefPos; | ||
| } | ||
| refPositionMap->Set(lastRefPos, restoreRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); | ||
| refPositionMap->Set(restoreRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); | ||
|
|
@@ -1465,20 +1437,6 @@ int LinearScan::BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode) | |
|
|
||
| lastRefPos = currRefPos; | ||
| regCount++; | ||
| if (rmwNode != nullptr) | ||
| { | ||
| // If we have rmwNode, determine if the currRefPos should be set to delay-free. | ||
| if ((currRefPos->getInterval() != rmwInterval) || (!rmwIsLastUse && !currRefPos->lastUse)) | ||
| { | ||
| setDelayFree(currRefPos); | ||
| #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE | ||
| if (restoreRefPos != nullptr) | ||
| { | ||
| setDelayFree(restoreRefPos); | ||
| } | ||
| #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Just `regCount` to actual registers count for first ref-position. | ||
kunalspathak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.