-
Notifications
You must be signed in to change notification settings - Fork 1.5k
PARQUET-787: Limit read allocation size #390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
614a2bb
103ed3d
56b22a6
a4fa05a
b0b6147
be52b59
e7c6c5d
4abba3e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
This renames the existing implementation to SingleBufferInputStream, moves the new implementation to MultiBufferInputStream, and adds an interface that both implement to access slices of the backing arrays.
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,82 +1,50 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.parquet.bytes; | ||
|
|
||
| import java.io.IOException; | ||
| import java.io.EOFException; | ||
| import java.io.InputStream; | ||
| import java.nio.ByteBuffer; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
|
|
||
| /** | ||
| * This ByteBufferInputStream does not consume the ByteBuffer being passed in, | ||
| * but will create a slice of the current buffer. | ||
| */ | ||
| public class ByteBufferInputStream extends InputStream { | ||
|
|
||
| protected ByteBuffer byteBuf; | ||
| protected int initPos; | ||
| protected int count; | ||
| public ByteBufferInputStream(ByteBuffer buffer) { | ||
| this(buffer, buffer.position(), buffer.remaining()); | ||
| } | ||
|
|
||
| public ByteBufferInputStream(ByteBuffer buffer, int offset, int count) { | ||
| ByteBuffer temp = buffer.duplicate(); | ||
| temp.position(offset); | ||
| byteBuf = temp.slice(); | ||
| byteBuf.limit(count); | ||
| this.initPos = offset; | ||
| this.count = count; | ||
| } | ||
|
|
||
| public ByteBuffer toByteBuffer() { | ||
| return byteBuf.slice(); | ||
| } | ||
|
|
||
| @Override | ||
| public int read() throws IOException { | ||
| if (!byteBuf.hasRemaining()) { | ||
| return -1; | ||
| public abstract class ByteBufferInputStream extends InputStream { | ||
|
|
||
| public static ByteBufferInputStream wrap(ByteBuffer... buffers) { | ||
| if (buffers.length == 1) { | ||
| return new SingleBufferInputStream(buffers[0]); | ||
| } else { | ||
| return new MultiBufferInputStream(Arrays.asList(buffers)); | ||
| } | ||
| //Workaround for unsigned byte | ||
| return byteBuf.get() & 0xFF; | ||
| } | ||
|
|
||
| @Override | ||
| public int read(byte[] bytes, int offset, int length) throws IOException { | ||
| int count = Math.min(byteBuf.remaining(), length); | ||
| if (count == 0) return -1; | ||
| byteBuf.get(bytes, offset, count); | ||
| return count; | ||
| } | ||
|
|
||
| @Override | ||
| public long skip(long n) { | ||
| if (n > byteBuf.remaining()) | ||
| n = byteBuf.remaining(); | ||
| int pos = byteBuf.position(); | ||
| byteBuf.position((int)(pos + n)); | ||
| return n; | ||
| public static ByteBufferInputStream wrap(List<ByteBuffer> buffers) { | ||
| if (buffers.size() == 1) { | ||
| return new SingleBufferInputStream(buffers.get(0)); | ||
| } else { | ||
| return new MultiBufferInputStream(buffers); | ||
| } | ||
| } | ||
|
|
||
| public abstract long position(); | ||
|
|
||
| public abstract List<ByteBuffer> sliceBuffers(long length) throws EOFException; | ||
|
|
||
| @Override | ||
| public int available() { | ||
| return byteBuf.remaining(); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,12 +17,10 @@ | |
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.parquet.hadoop.util; | ||
| package org.apache.parquet.bytes; | ||
|
|
||
| import org.apache.parquet.Preconditions; | ||
| import java.io.EOFException; | ||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.nio.ByteBuffer; | ||
| import java.util.ArrayList; | ||
| import java.util.Collection; | ||
|
|
@@ -31,7 +29,7 @@ | |
| import java.util.List; | ||
| import java.util.NoSuchElementException; | ||
|
|
||
| public class ByteBufferInputStream extends InputStream { | ||
| class MultiBufferInputStream extends ByteBufferInputStream { | ||
| private static final ByteBuffer EMPTY = ByteBuffer.allocate(0); | ||
|
|
||
| private final Collection<ByteBuffer> buffers; | ||
|
||
|
|
@@ -45,7 +43,7 @@ public class ByteBufferInputStream extends InputStream { | |
| private long markLimit = 0; | ||
| private List<ByteBuffer> markBuffers = new ArrayList<>(); | ||
|
|
||
| public ByteBufferInputStream(Collection<ByteBuffer> buffers) { | ||
| MultiBufferInputStream(Collection<ByteBuffer> buffers) { | ||
| this.buffers = buffers; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is not a good practice to accept a mutable collection as is. I would suggest copying it at some point.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because all of the uses are internal, I think it is safe. I'd rather not create extra lists and copy because I don't think it is likely that the lists passed in here are going to be reused.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
|
||
| long totalLen = 0; | ||
|
|
@@ -164,7 +162,7 @@ public int read() throws IOException { | |
| while (true) { | ||
| if (current.remaining() > 0) { | ||
| this.position += 1; | ||
| return current.get(); | ||
| return current.get() & 0xFF; // as unsigned | ||
| } else if (!nextBuffer()) { | ||
| // there are no more buffers | ||
| throw new EOFException(); | ||
|
|
@@ -204,7 +202,7 @@ public synchronized void reset() throws IOException { | |
| discardMark(); | ||
| nextBuffer(); // go back to the marked buffers | ||
| } else { | ||
| throw new RuntimeException("No mark defined"); | ||
| throw new IOException("No mark defined"); | ||
|
||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was this white-space change intentional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I'll revert it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.