Skip to content

Commit da467da

Browse files
authored
Regenerate Float16 files without compression (#42)
2 parents 89b685a + ee03e10 commit da467da

File tree

3 files changed

+14
-14
lines changed

3 files changed

+14
-14
lines changed

data/README.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -261,23 +261,23 @@ t2 = pa.Table.from_arrays(
261261
type=pa.float16())],
262262
names="x")
263263

264-
pq.write_table(t1, "float16_zeros_and_nans.parquet")
265-
pq.write_table(t2, "float16_nonzeros_and_nans.parquet")
264+
pq.write_table(t1, "float16_zeros_and_nans.parquet", compression='none')
265+
pq.write_table(t2, "float16_nonzeros_and_nans.parquet", compression='none')
266266

267267
m1 = pq.read_metadata("float16_zeros_and_nans.parquet")
268268
m2 = pq.read_metadata("float16_nonzeros_and_nans.parquet")
269269

270270
print(m1.row_group(0).column(0))
271271
print(m2.row_group(0).column(0))
272-
# <pyarrow._parquet.ColumnChunkMetaData object at 0x7f24d48c4d60>
273-
# file_offset: 72
272+
# <pyarrow._parquet.ColumnChunkMetaData object at 0x7f79e9a3d850>
273+
# file_offset: 68
274274
# file_path:
275275
# physical_type: FIXED_LEN_BYTE_ARRAY
276276
# num_values: 3
277277
# path_in_schema: x
278278
# is_stats_set: True
279279
# statistics:
280-
# <pyarrow._parquet.Statistics object at 0x7f24d48c4ea0>
280+
# <pyarrow._parquet.Statistics object at 0x7f79e9a3d940>
281281
# has_min_max: True
282282
# min: b'\x00\x80'
283283
# max: b'\x00\x00'
@@ -287,22 +287,22 @@ print(m2.row_group(0).column(0))
287287
# physical_type: FIXED_LEN_BYTE_ARRAY
288288
# logical_type: Float16
289289
# converted_type (legacy): NONE
290-
# compression: SNAPPY
290+
# compression: UNCOMPRESSED
291291
# encodings: ('PLAIN', 'RLE', 'RLE_DICTIONARY')
292292
# has_dictionary_page: True
293293
# dictionary_page_offset: 4
294-
# data_page_offset: 24
295-
# total_compressed_size: 68
294+
# data_page_offset: 22
295+
# total_compressed_size: 64
296296
# total_uncompressed_size: 64
297-
# <pyarrow._parquet.ColumnChunkMetaData object at 0x7f24d48c4d60>
298-
# file_offset: 84
297+
# <pyarrow._parquet.ColumnChunkMetaData object at 0x7f79ea003c40>
298+
# file_offset: 80
299299
# file_path:
300300
# physical_type: FIXED_LEN_BYTE_ARRAY
301301
# num_values: 8
302302
# path_in_schema: x
303303
# is_stats_set: True
304304
# statistics:
305-
# <pyarrow._parquet.Statistics object at 0x7f24d48c4e50>
305+
# <pyarrow._parquet.Statistics object at 0x7f79e9a3d8a0>
306306
# has_min_max: True
307307
# min: b'\x00\xc0'
308308
# max: b'\x00@'
@@ -312,11 +312,11 @@ print(m2.row_group(0).column(0))
312312
# physical_type: FIXED_LEN_BYTE_ARRAY
313313
# logical_type: Float16
314314
# converted_type (legacy): NONE
315-
# compression: SNAPPY
315+
# compression: UNCOMPRESSED
316316
# encodings: ('PLAIN', 'RLE', 'RLE_DICTIONARY')
317317
# has_dictionary_page: True
318318
# dictionary_page_offset: 4
319-
# data_page_offset: 34
320-
# total_compressed_size: 80
319+
# data_page_offset: 32
320+
# total_compressed_size: 76
321321
# total_uncompressed_size: 76
322322
```
-4 Bytes
Binary file not shown.
-4 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)