2727import org .apache .parquet .column .ParquetProperties ;
2828import org .apache .parquet .column .page .DictionaryPage ;
2929import org .apache .parquet .column .page .PageWriter ;
30+ import org .apache .parquet .column .statistics .SizeStatistics ;
3031import org .apache .parquet .column .statistics .Statistics ;
3132import org .apache .parquet .column .values .ValuesWriter ;
3233import org .apache .parquet .column .values .bloomfilter .BlockSplitBloomFilter ;
@@ -56,6 +57,7 @@ abstract class ColumnWriterBase implements ColumnWriter {
5657 private int valueCount ;
5758
5859 private Statistics <?> statistics ;
60+ private SizeStatistics .Builder sizeStatisticsBuilder ;
5961 private long rowsWrittenSoFar = 0 ;
6062 private int pageRowCount ;
6163
@@ -116,6 +118,8 @@ private void log(Object value, int r, int d) {
116118
117119 private void resetStatistics () {
118120 this .statistics = Statistics .createStats (path .getPrimitiveType ());
121+ this .sizeStatisticsBuilder = new SizeStatistics .Builder (
122+ path .getPrimitiveType (), path .getMaxRepetitionLevel (), path .getMaxDefinitionLevel ());
119123 }
120124
121125 private void definitionLevel (int definitionLevel ) {
@@ -143,6 +147,7 @@ public void writeNull(int repetitionLevel, int definitionLevel) {
143147 repetitionLevel (repetitionLevel );
144148 definitionLevel (definitionLevel );
145149 statistics .incrementNumNulls ();
150+ sizeStatisticsBuilder .add (repetitionLevel , definitionLevel );
146151 ++valueCount ;
147152 }
148153
@@ -207,6 +212,7 @@ public void write(double value, int repetitionLevel, int definitionLevel) {
207212 definitionLevel (definitionLevel );
208213 dataColumn .writeDouble (value );
209214 statistics .updateStats (value );
215+ sizeStatisticsBuilder .add (repetitionLevel , definitionLevel );
210216 updateBloomFilter (value );
211217 ++valueCount ;
212218 }
@@ -226,6 +232,7 @@ public void write(float value, int repetitionLevel, int definitionLevel) {
226232 definitionLevel (definitionLevel );
227233 dataColumn .writeFloat (value );
228234 statistics .updateStats (value );
235+ sizeStatisticsBuilder .add (repetitionLevel , definitionLevel );
229236 updateBloomFilter (value );
230237 ++valueCount ;
231238 }
@@ -245,6 +252,7 @@ public void write(Binary value, int repetitionLevel, int definitionLevel) {
245252 definitionLevel (definitionLevel );
246253 dataColumn .writeBytes (value );
247254 statistics .updateStats (value );
255+ sizeStatisticsBuilder .add (repetitionLevel , definitionLevel , value );
248256 updateBloomFilter (value );
249257 ++valueCount ;
250258 }
@@ -264,6 +272,7 @@ public void write(boolean value, int repetitionLevel, int definitionLevel) {
264272 definitionLevel (definitionLevel );
265273 dataColumn .writeBoolean (value );
266274 statistics .updateStats (value );
275+ sizeStatisticsBuilder .add (repetitionLevel , definitionLevel );
267276 ++valueCount ;
268277 }
269278
@@ -282,6 +291,7 @@ public void write(int value, int repetitionLevel, int definitionLevel) {
282291 definitionLevel (definitionLevel );
283292 dataColumn .writeInteger (value );
284293 statistics .updateStats (value );
294+ sizeStatisticsBuilder .add (repetitionLevel , definitionLevel );
285295 updateBloomFilter (value );
286296 ++valueCount ;
287297 }
@@ -301,6 +311,7 @@ public void write(long value, int repetitionLevel, int definitionLevel) {
301311 definitionLevel (definitionLevel );
302312 dataColumn .writeLong (value );
303313 statistics .updateStats (value );
314+ sizeStatisticsBuilder .add (repetitionLevel , definitionLevel );
304315 updateBloomFilter (value );
305316 ++valueCount ;
306317 }
@@ -395,7 +406,8 @@ void writePage() {
395406 if (DEBUG )
396407 LOG .debug ("write page" );
397408 try {
398- writePage (pageRowCount , valueCount , statistics , repetitionLevelColumn , definitionLevelColumn , dataColumn );
409+ writePage (pageRowCount , valueCount , statistics , sizeStatisticsBuilder .build (),
410+ repetitionLevelColumn , definitionLevelColumn , dataColumn );
399411 } catch (IOException e ) {
400412 throw new ParquetEncodingException ("could not write page for " + path , e );
401413 }
@@ -407,6 +419,10 @@ void writePage() {
407419 pageRowCount = 0 ;
408420 }
409421
422+ @ Deprecated
410423 abstract void writePage (int rowCount , int valueCount , Statistics <?> statistics , ValuesWriter repetitionLevels ,
411424 ValuesWriter definitionLevels , ValuesWriter values ) throws IOException ;
425+
426+ abstract void writePage (int rowCount , int valueCount , Statistics <?> statistics , SizeStatistics sizeStatistics ,
427+ ValuesWriter repetitionLevels , ValuesWriter definitionLevels , ValuesWriter values ) throws IOException ;
412428}
0 commit comments