diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 917822d4248f..7ec9ff928e9a 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -351,19 +351,51 @@ pub type Time64MicrosecondArray = PrimitiveArray; /// hold values such as `00:02:00.123456789` pub type Time64NanosecondArray = PrimitiveArray; -/// A [`PrimitiveArray`] of “calendar” intervals in months +/// A [`PrimitiveArray`] of “calendar” intervals in whole months /// /// See [`IntervalYearMonthType`] for details on representation and caveats. +/// +/// # Example +/// ``` +/// # use arrow_array::IntervalYearMonthArray; +/// let array = IntervalYearMonthArray::from(vec![ +/// 2, // 2 months +/// 25, // 2 years and 1 month +/// -1 // -1 months +/// ]); +/// ``` pub type IntervalYearMonthArray = PrimitiveArray; /// A [`PrimitiveArray`] of “calendar” intervals in days and milliseconds /// -/// See [`IntervalDayTimeType`] for details on representation and caveats. +/// See [`IntervalDayTime`] for details on representation and caveats. +/// +/// # Example +/// ``` +/// # use arrow_array::IntervalDayTimeArray; +/// use arrow_array::types::IntervalDayTime; +/// let array = IntervalDayTimeArray::from(vec![ +/// IntervalDayTime::new(1, 1000), // 1 day, 1000 milliseconds +/// IntervalDayTime::new(33, 0), // 33 days, 0 milliseconds +/// IntervalDayTime::new(0, 12 * 60 * 60 * 1000), // 0 days, 12 hours +/// ]); +/// ``` pub type IntervalDayTimeArray = PrimitiveArray; /// A [`PrimitiveArray`] of “calendar” intervals in months, days, and nanoseconds. /// -/// See [`IntervalMonthDayNanoType`] for details on representation and caveats. +/// See [`IntervalMonthDayNano`] for details on representation and caveats. +/// +/// # Example +/// ``` +/// # use arrow_array::IntervalMonthDayNanoArray; +/// use arrow_array::types::IntervalMonthDayNano; +/// let array = IntervalMonthDayNanoArray::from(vec![ +/// IntervalMonthDayNano::new(1, 2, 1000), // 1 month, 2 days, 1 nanosecond +/// IntervalMonthDayNano::new(12, 1, 0), // 12 months, 1 days, 0 nanoseconds +/// IntervalMonthDayNano::new(0, 0, 12 * 1000 * 1000), // 0 days, 12 milliseconds +/// ]); +/// ``` pub type IntervalMonthDayNanoArray = PrimitiveArray; /// A [`PrimitiveArray`] of elapsed durations in seconds diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index ac77fd45d03c..86298cf3e729 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -23,7 +23,7 @@ use crate::delta::{ use crate::temporal_conversions::as_datetime_with_timezone; use crate::timezone::Tz; use crate::{ArrowNativeTypeOp, OffsetSizeTrait}; -use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano, OffsetBuffer}; +use arrow_buffer::{i256, Buffer, OffsetBuffer}; use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision}; use arrow_data::{validate_binary_view, validate_string_view}; use arrow_schema::{ @@ -36,6 +36,9 @@ use std::fmt::Debug; use std::marker::PhantomData; use std::ops::{Add, Sub}; +// re-export types so that they can be used without importing arrow_buffer explicitly +pub use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; + // BooleanType is special: its bit-width is not the size of the primitive type, and its `index` // operation assumes bit-packing. /// A boolean datatype @@ -218,84 +221,19 @@ make_type!( IntervalYearMonthType, i32, DataType::Interval(IntervalUnit::YearMonth), - "A “calendar” interval stored as the number of whole months." + "A 32-bit “calendar” interval type representing the number of whole months." ); make_type!( IntervalDayTimeType, IntervalDayTime, DataType::Interval(IntervalUnit::DayTime), - r#"A “calendar” interval type in days and milliseconds. - -## Representation -This type is stored as a single 64 bit integer, interpreted as two i32 fields: -1. the number of elapsed days -2. The number of milliseconds (no leap seconds), - -```text - ┌──────────────┬──────────────┐ - │ Days │ Milliseconds │ - │ (32 bits) │ (32 bits) │ - └──────────────┴──────────────┘ - 0 31 63 bit offset -``` -Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L406-L408) for more details - -## Note on Comparing and Ordering for Calendar Types - -Values of `IntervalDayTimeType` are compared using their binary representation, -which can lead to surprising results. Please see the description of ordering on -[`IntervalMonthDayNanoType`] for more details -"# + "A “calendar” interval type representing days and milliseconds. See [`IntervalDayTime`] for more details." ); make_type!( IntervalMonthDayNanoType, IntervalMonthDayNano, DataType::Interval(IntervalUnit::MonthDayNano), - r#"A “calendar” interval type in months, days, and nanoseconds. - -## Representation -This type is stored as a single 128 bit integer, -interpreted as three different signed integral fields: - -1. The number of months (32 bits) -2. The number days (32 bits) -2. The number of nanoseconds (64 bits). - -Nanoseconds does not allow for leap seconds. -Each field is independent (e.g. there is no constraint that the quantity of -nanoseconds represents less than a day's worth of time). - -```text -┌───────────────┬─────────────┬─────────────────────────────┐ -│ Months │ Days │ Nanos │ -│ (32 bits) │ (32 bits) │ (64 bits) │ -└───────────────┴─────────────┴─────────────────────────────┘ - 0 32 64 128 bit offset -``` -Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details - -## Note on Comparing and Ordering for Calendar Types -Values of `IntervalMonthDayNanoType` are compared using their binary representation, -which can lead to surprising results. - -Spans of time measured in calendar units are not fixed in absolute size (e.g. -number of seconds) which makes defining comparisons and ordering non trivial. -For example `1 month` is 28 days for February but `1 month` is 31 days -in December. - -This makes the seemingly simple operation of comparing two intervals -complicated in practice. For example is `1 month` more or less than `30 days`? The -answer depends on what month you are talking about. - -This crate defines comparisons for calendar types using their binary -representation which is fast and efficient, but leads -to potentially surprising results. - -For example a -`IntervalMonthDayNano` of `1 month` will compare as **greater** than a -`IntervalMonthDayNano` of `100 days` because the binary representation of `1 month` -is larger than the binary representation of 100 days. -"# + r"A “calendar” interval type representing months, days, and nanoseconds. See [`IntervalMonthDayNano`] for more details." ); make_type!( DurationSecondType, diff --git a/arrow-buffer/src/interval.rs b/arrow-buffer/src/interval.rs index bed3b2e31ada..8f3342131656 100644 --- a/arrow-buffer/src/interval.rs +++ b/arrow-buffer/src/interval.rs @@ -19,6 +19,52 @@ use crate::arith::derive_arith; use std::ops::Neg; /// Value of an IntervalMonthDayNano array +/// +/// ## Representation +/// +/// This type is stored as a single 128 bit integer, interpreted as three +/// different signed integral fields: +/// +/// 1. The number of months (32 bits) +/// 2. The number days (32 bits) +/// 2. The number of nanoseconds (64 bits). +/// +/// Nanoseconds does not allow for leap seconds. +/// +/// Each field is independent (e.g. there is no constraint that the quantity of +/// nanoseconds represents less than a day's worth of time). +/// +/// ```text +/// ┌───────────────┬─────────────┬─────────────────────────────┐ +/// │ Months │ Days │ Nanos │ +/// │ (32 bits) │ (32 bits) │ (64 bits) │ +/// └───────────────┴─────────────┴─────────────────────────────┘ +/// 0 32 64 128 bit offset +/// ``` +/// Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details +/// +///## Note on Comparing and Ordering for Calendar Types +/// +/// Values of `IntervalMonthDayNano` are compared using their binary +/// representation, which can lead to surprising results. +/// +/// Spans of time measured in calendar units are not fixed in absolute size (e.g. +/// number of seconds) which makes defining comparisons and ordering non trivial. +/// For example `1 month` is 28 days for February but `1 month` is 31 days +/// in December. +/// +/// This makes the seemingly simple operation of comparing two intervals +/// complicated in practice. For example is `1 month` more or less than `30 +/// days`? The answer depends on what month you are talking about. +/// +/// This crate defines comparisons for calendar types using their binary +/// representation which is fast and efficient, but leads +/// to potentially surprising results. +/// +/// For example a +/// `IntervalMonthDayNano` of `1 month` will compare as **greater** than a +/// `IntervalMonthDayNano` of `100 days` because the binary representation of `1 month` +/// is larger than the binary representation of 100 days. #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] #[repr(C)] pub struct IntervalMonthDayNano { @@ -272,6 +318,30 @@ derive_arith!( ); /// Value of an IntervalDayTime array +/// +/// ## Representation +/// +/// This type is stored as a single 64 bit integer, interpreted as two i32 +/// fields: +/// +/// 1. the number of elapsed days +/// 2. The number of milliseconds (no leap seconds), +/// +/// ```text +/// ┌──────────────┬──────────────┐ +/// │ Days │ Milliseconds │ +/// │ (32 bits) │ (32 bits) │ +/// └──────────────┴──────────────┘ +/// 0 31 63 bit offset +/// ``` +/// +/// Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L406-L408) for more details +/// +/// ## Note on Comparing and Ordering for Calendar Types +/// +/// Values of `IntervalDayTime` are compared using their binary representation, +/// which can lead to surprising results. Please see the description of ordering on +/// [`IntervalMonthDayNano`] for more details #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] #[repr(C)] pub struct IntervalDayTime {