-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-15962][SQL] Introduce implementation with a dense format for UnsafeArrayData #13680
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit
Hold shift + click to select a range
fb9a42d
add two implementations (sparse and dense) for UnsafeArrayData
kiszk d931428
fix failures of testsuite
kiszk 9777a2d
fix errors of unit tests
kiszk 000eda4
fix failures of unit tests
kiszk 804f081
make DenseID public
kiszk e6fb261
Use one implementation approach
kiszk a313084
fix test failures
kiszk 68d92f7
fix test failures
kiszk 7f2da14
update test suite
kiszk 2f26f6f
fix scala style error
kiszk ccef63c
revert changes
kiszk c4f1b5e
addressed comments
kiszk 34a5c6a
add benchmark
kiszk 7a77b20
fix scala style error
kiszk 7b0d4da
addressed comments
kiszk b4eac29
addressed comments
kiszk eecf6bd
fix parameters of Platform.OFFSET
kiszk d88a25a
update benchmark results
kiszk db15432
add test cases
kiszk 3fa7052
addressed comments
kiszk 4c094c2
addressed comments
kiszk 9887171
update test cases
kiszk 9fe7ad0
address comments
kiszk e4b4b52
address comments for test cases and benchmark
kiszk 585ca7b
addressed comments
kiszk 9933a06
addressed review comments
kiszk 919e832
fixed test failures
kiszk 0886e3a
update test suites
kiszk c385bf4
align each of variable length elements to 8 bytes
kiszk c8813db
fixed test failures
kiszk aa7cfdb
fixed test failures
kiszk 0b7867b
address review comments
kiszk ab9a16a
address review comments
kiszk 515701b
address review comments
kiszk 8169abd
change benchmark size
kiszk e356a79
addressed comments
kiszk 2ef6e3b
update performance results
kiszk File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
align each of variable length elements to 8 bytes
- Loading branch information
commit c385bf485af2ed33465aae906abd8246b512a5e2
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,22 +55,41 @@ public void initialize(BufferHolder holder, int numElements, int elementSize) { | |
| this.startingOffset = holder.cursor; | ||
|
|
||
| // Grows the global buffer ahead for header and fixed size data. | ||
| holder.grow(headerInBytes + elementSize * numElements); | ||
| int fixedPartLength = ((elementSize * numElements + 7) / 8) * 8; | ||
| holder.grow(headerInBytes + fixedPartLength); | ||
|
|
||
| // Write numElements and clear out null bits to header | ||
| Platform.putLong(holder.buffer, startingOffset, numElements); | ||
| for (int i = 8; i < headerInBytes; i += 8) { | ||
| Platform.putLong(holder.buffer, startingOffset + i, 0L); | ||
| } | ||
| holder.cursor += (headerInBytes + elementSize * numElements); | ||
| holder.cursor += (headerInBytes + fixedPartLength); | ||
| } | ||
|
|
||
| private long getElementOffset(int ordinal, int elementSize) { | ||
| return startingOffset + headerInBytes + ordinal * elementSize; | ||
| } | ||
|
|
||
| public void setOffset(int ordinal) { | ||
| write(ordinal, holder.cursor - startingOffset); | ||
| public void setOffsetAndSize(int ordinal, long currentCursor, long size) { | ||
| final long relativeOffset = currentCursor - startingOffset; | ||
| final long offsetAndSize = (relativeOffset << 32) | size; | ||
|
|
||
| write(ordinal, offsetAndSize); | ||
| } | ||
|
|
||
| // Do word alignment for this row and grow the row buffer if needed. | ||
| public void alignToEightBytes(int numBytes) { | ||
| final int remainder = numBytes & 0x07; | ||
|
|
||
| if (remainder > 0) { | ||
| final int paddingBytes = 8 - remainder; | ||
| holder.grow(paddingBytes); | ||
|
|
||
| for (int i = 0; i < paddingBytes; i++) { | ||
| Platform.putByte(holder.buffer, holder.cursor, (byte) 0); | ||
| holder.cursor++; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private void setNullBit(int ordinal) { | ||
|
|
@@ -182,10 +201,10 @@ public void write(int ordinal, Decimal input, int precision, int scale) { | |
| // Write the bytes to the variable length portion. | ||
| Platform.copyMemory( | ||
| bytes, Platform.BYTE_ARRAY_OFFSET, holder.buffer, holder.cursor, bytes.length); | ||
| setOffset(ordinal); | ||
| write(ordinal, ((long)(holder.cursor - startingOffset) << 32) | ((long) bytes.length)); | ||
|
|
||
| // move the cursor forward. | ||
| holder.cursor += bytes.length; | ||
| // move the cursor forward with 8-bytes boundary | ||
| holder.cursor += ((bytes.length + 7) / 8) * 8; | ||
| } | ||
| } else { | ||
| setNull(ordinal); | ||
|
|
@@ -194,31 +213,34 @@ public void write(int ordinal, Decimal input, int precision, int scale) { | |
|
|
||
| public void write(int ordinal, UTF8String input) { | ||
| final int numBytes = input.numBytes(); | ||
| final int bufferLength = ((numBytes + 7) / 8) * 8; // 8-bytes boundary | ||
|
|
||
| // grow the global buffer before writing data. | ||
| holder.grow(numBytes); | ||
| holder.grow(bufferLength); | ||
|
|
||
| // Write the bytes to the variable length portion. | ||
| input.writeToMemory(holder.buffer, holder.cursor); | ||
|
|
||
| setOffset(ordinal); | ||
| write(ordinal, ((long)(holder.cursor - startingOffset) << 32) | ((long) numBytes)); | ||
|
||
|
|
||
| // move the cursor forward. | ||
| holder.cursor += numBytes; | ||
| holder.cursor += bufferLength; | ||
| } | ||
|
|
||
| public void write(int ordinal, byte[] input) { | ||
| final int bufferLength = ((input.length + 7) / 8) * 8; // 8-bytes boundary | ||
|
|
||
| // grow the global buffer before writing data. | ||
| holder.grow(input.length); | ||
| holder.grow(bufferLength); | ||
|
|
||
| // Write the bytes to the variable length portion. | ||
| Platform.copyMemory( | ||
| input, Platform.BYTE_ARRAY_OFFSET, holder.buffer, holder.cursor, input.length); | ||
|
|
||
| setOffset(ordinal); | ||
| write(ordinal, ((long)(holder.cursor - startingOffset) << 32) | ((long) input.length)); | ||
|
|
||
| // move the cursor forward. | ||
| holder.cursor += input.length; | ||
| holder.cursor += bufferLength; | ||
| } | ||
|
|
||
| public void write(int ordinal, CalendarInterval input) { | ||
|
|
@@ -229,7 +251,7 @@ public void write(int ordinal, CalendarInterval input) { | |
| Platform.putLong(holder.buffer, holder.cursor, input.months); | ||
| Platform.putLong(holder.buffer, holder.cursor + 8, input.microseconds); | ||
|
|
||
| setOffset(ordinal); | ||
| write(ordinal, ((long)(holder.cursor - startingOffset) << 32) | ((long) 16)); | ||
|
|
||
| // move the cursor forward. | ||
| holder.cursor += 16; | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should abstract in into a method, like
UnsafeRowWriter.setOffsetAndSizeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, done