@@ -629,47 +629,38 @@ impl From<ShreddedVariantFieldArray> for StructArray {
629629///
630630/// | value | typed_value | Meaning |
631631/// |----------|--------------|---------|
632- /// | null | null | The value is missing; only valid for shredded object fields |
633- /// | non-null | null | The value is present and may be any type, including `null` |
634- /// | null | non-null | The value is present and is the shredded type |
635- /// | non-null | non-null | The value is present and is a partially shredded object |
632+ /// | NULL | NULL | The value is missing; only valid for shredded object fields |
633+ /// | non-NULL | NULL | The value is present and may be any type, including [`Variant::Null`] |
634+ /// | NULL | non-NULL | The value is present and is the shredded type |
635+ /// | non-NULL | non-NULL | The value is present and is a partially shredded object |
636+ ///
637+ ///
638+ /// Applying the above rules to entire columns, we obtain the following:
639+ ///
640+ /// | value | typed_value | Meaning |
641+ /// |--------|-------------|---------|
642+ /// | -- | -- | **Missing**: The value is always missing; only valid for shredded object fields |
643+ /// | exists | -- | **Unshredded**: If present, the value may be any type, including [`Variant::Null`]
644+ /// | -- | exists | **Perfectly shredded**: If present, the value is always the shredded type |
645+ /// | exists | exists | **Imperfectly shredded**: The value might (not) be present and might (not) be the shredded type |
646+ ///
647+ /// NOTE: Partial shredding is a row-wise situation that can arise under imperfect shredding (a
648+ /// column-wise situation): When both columns exist (imperfect shredding) and the typed_value column
649+ /// is a struct, then both columns can be non-NULL for the same row if value is a variant object
650+ /// (partial shredding).
636651///
637652/// [Parquet Variant Shredding Spec]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding
638653#[ derive( Clone , Debug ) ]
639- pub enum ShreddingState {
640- /// This variant has no typed_value field
641- Unshredded { value : BinaryViewArray } ,
642- /// This variant has a typed_value field and no value field
643- /// meaning it is the shredded type
644- Typed { typed_value : ArrayRef } ,
645- /// Imperfectly shredded: Shredded values reside in `typed_value` while those that failed to
646- /// shred reside in `value`. Missing field values are NULL in both columns, while NULL primitive
647- /// values have NULL `typed_value` and `Variant::Null` in `value`.
648- ///
649- /// NOTE: A partially shredded struct is a special kind of imperfect shredding, where
650- /// `typed_value` and `value` are both non-NULL. The `typed_value` is a struct containing the
651- /// subset of fields for which shredding was attempted (each field will then have its own value
652- /// and/or typed_value sub-fields that indicate how shredding actually turned out). Meanwhile,
653- /// the `value` is a variant object containing the subset of fields for which shredding was
654- /// not even attempted.
655- PartiallyShredded {
656- value : BinaryViewArray ,
657- typed_value : ArrayRef ,
658- } ,
659- /// All values are null, only metadata is present.
660- ///
661- /// This state occurs when neither `value` nor `typed_value` fields exist in the schema.
662- /// Note: By strict spec interpretation, this should only be valid for shredded object fields,
663- /// not top-level variants. However, we allow it and treat as Variant::Null for pragmatic
664- /// handling of missing data.
665- AllNull ,
654+ pub struct ShreddingState {
655+ value : Option < BinaryViewArray > ,
656+ typed_value : Option < ArrayRef > ,
666657}
667658
668659impl ShreddingState {
669- /// try to create a new `ShreddingState` from the given `value` and `typed_value` fields
660+ /// Create a new `ShreddingState` from the given `value` and `typed_value` fields
670661 ///
671662 /// Note you can create a `ShreddingState` from a &[`StructArray`] using
672- /// `ShreddingState::try_from (&struct_array)`, for example:
663+ /// `ShreddingState::from (&struct_array)`, for example:
673664 ///
674665 /// ```no_run
675666 /// # use arrow::array::StructArray;
@@ -678,53 +669,27 @@ impl ShreddingState {
678669 /// # unimplemented!()
679670 /// # }
680671 /// let struct_array: StructArray = get_struct_array();
681- /// let shredding_state = ShreddingState::try_from (&struct_array).unwrap( );
672+ /// let shredding_state = ShreddingState::from (&struct_array);
682673 /// ```
683674 pub fn new ( value : Option < BinaryViewArray > , typed_value : Option < ArrayRef > ) -> Self {
684- match ( value, typed_value) {
685- ( Some ( value) , Some ( typed_value) ) => Self :: PartiallyShredded { value, typed_value } ,
686- ( Some ( value) , None ) => Self :: Unshredded { value } ,
687- ( None , Some ( typed_value) ) => Self :: Typed { typed_value } ,
688- ( None , None ) => Self :: AllNull ,
689- }
675+ Self { value, typed_value }
690676 }
691677
692678 /// Return a reference to the value field, if present
693679 pub fn value_field ( & self ) -> Option < & BinaryViewArray > {
694- match self {
695- ShreddingState :: Unshredded { value, .. } => Some ( value) ,
696- ShreddingState :: Typed { .. } => None ,
697- ShreddingState :: PartiallyShredded { value, .. } => Some ( value) ,
698- ShreddingState :: AllNull => None ,
699- }
680+ self . value . as_ref ( )
700681 }
701682
702683 /// Return a reference to the typed_value field, if present
703684 pub fn typed_value_field ( & self ) -> Option < & ArrayRef > {
704- match self {
705- ShreddingState :: Unshredded { .. } => None ,
706- ShreddingState :: Typed { typed_value, .. } => Some ( typed_value) ,
707- ShreddingState :: PartiallyShredded { typed_value, .. } => Some ( typed_value) ,
708- ShreddingState :: AllNull => None ,
709- }
685+ self . typed_value . as_ref ( )
710686 }
711687
712688 /// Slice all the underlying arrays
713689 pub fn slice ( & self , offset : usize , length : usize ) -> Self {
714- match self {
715- ShreddingState :: Unshredded { value } => ShreddingState :: Unshredded {
716- value : value. slice ( offset, length) ,
717- } ,
718- ShreddingState :: Typed { typed_value } => ShreddingState :: Typed {
719- typed_value : typed_value. slice ( offset, length) ,
720- } ,
721- ShreddingState :: PartiallyShredded { value, typed_value } => {
722- ShreddingState :: PartiallyShredded {
723- value : value. slice ( offset, length) ,
724- typed_value : typed_value. slice ( offset, length) ,
725- }
726- }
727- ShreddingState :: AllNull => ShreddingState :: AllNull ,
690+ Self {
691+ value : self . value . as_ref ( ) . map ( |v| v. slice ( offset, length) ) ,
692+ typed_value : self . typed_value . as_ref ( ) . map ( |tv| tv. slice ( offset, length) ) ,
728693 }
729694 }
730695}
@@ -1060,7 +1025,10 @@ mod test {
10601025 // Verify the shredding state is AllNull
10611026 assert ! ( matches!(
10621027 variant_array. shredding_state( ) ,
1063- ShreddingState :: AllNull
1028+ ShreddingState {
1029+ value: None ,
1030+ typed_value: None
1031+ }
10641032 ) ) ;
10651033
10661034 // Verify that value() returns Variant::Null (compensating for spec violation)
@@ -1120,7 +1088,10 @@ mod test {
11201088 // Verify the shredding state is AllNull
11211089 assert ! ( matches!(
11221090 ShreddingState :: new( None , None ) ,
1123- ShreddingState :: AllNull
1091+ ShreddingState {
1092+ value: None ,
1093+ typed_value: None
1094+ }
11241095 ) ) ;
11251096 }
11261097
@@ -1137,7 +1108,10 @@ mod test {
11371108 // Verify the shredding state is AllNull
11381109 assert ! ( matches!(
11391110 variant_array. shredding_state( ) ,
1140- ShreddingState :: AllNull
1111+ ShreddingState {
1112+ value: None ,
1113+ typed_value: None
1114+ }
11411115 ) ) ;
11421116
11431117 // Verify all values are null
@@ -1187,7 +1161,10 @@ mod test {
11871161 // This should be Unshredded, not AllNull, because value field exists in schema
11881162 assert ! ( matches!(
11891163 variant_array. shredding_state( ) ,
1190- ShreddingState :: Unshredded { .. }
1164+ ShreddingState {
1165+ value: Some ( _) ,
1166+ typed_value: None
1167+ }
11911168 ) ) ;
11921169 }
11931170}
0 commit comments