From 3c5a041b205db9ebf90bee53c6235a8b9918b18e Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Wed, 19 Jun 2024 23:16:40 +0200
Subject: [PATCH 1/3] Add support for decoding Timestamp as Decimal128

This allows support for the full range of ORC timestamp, with full precision.
---
 README.md                      |  14 ++-
 src/array_decoder/timestamp.rs | 210 ++++++++++++++++++++++-----------
 src/reader/decode/timestamp.rs |  17 +--
 tests/basic/main.rs            |  31 ++++-
 4 files changed, 188 insertions(+), 84 deletions(-)

diff --git a/README.md b/README.md
index bce2ddd..6532c32 100644
--- a/README.md
+++ b/README.md
@@ -68,14 +68,20 @@ The following table lists how ORC data types are read into Arrow data types:
 | Binary            | Binary                      |       |
 | Decimal           | Decimal128                  |       |
 | Date              | Date32                      |       |
-| Timestamp         | Timestamp(Nanosecond, None) | `ArrowReaderBuilder::with_schema` allows configuring different time units. Overflows or loss of precision while decoding results in `OrcError` |
-| Timestamp instant | Timestamp(Nanosecond, UTC)  | `ArrowReaderBuilder::with_schema` allows configuring different time units. Overflows or loss of precision while decoding results in `OrcError` |
+| Timestamp         | Timestamp(Nanosecond, None) | ¹     |
+| Timestamp instant | Timestamp(Nanosecond, UTC)  | ¹     |
 | Struct            | Struct                      |       |
 | List              | List                        |       |
 | Map               | Map                         |       |
-| Union             | Union(_, Sparse)*           |       |
+| Union             | Union(_, Sparse)            | ²     |
 
-*Currently only supports a maximum of 127 variants
+¹: `ArrowReaderBuilder::with_schema` allows configuring different time units or decoding to
+`Decimal128(38, 9)` (i128 of non-leap nanoseconds since UNIX epoch).
+Overflows may happen while decoding to a non-Seconds time unit, and results in `OrcError`.
+Loss of precision may happen while decoding to a non-Nanosecond time unit, and results in `OrcError`.
+`Decimal128(38, 9)` avoids both overflows and loss of precision.
+
+²: Currently only supports a maximum of 127 variants
 
 ## Contributing
 
diff --git a/src/array_decoder/timestamp.rs b/src/array_decoder/timestamp.rs
index 6922eea..a48cd69 100644
--- a/src/array_decoder/timestamp.rs
+++ b/src/array_decoder/timestamp.rs
@@ -10,16 +10,20 @@ use crate::{
 };
 use arrow::array::ArrayRef;
 use arrow::datatypes::{
-    ArrowTimestampType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType,
+    ArrowTimestampType, Decimal128Type, DecimalType, TimeUnit, TimestampMicrosecondType,
+    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
 };
 use chrono::offset::TimeZone;
+use chrono::TimeDelta;
 use chrono_tz::Tz;
 use snafu::ensure;
 
-use super::{ArrayBatchDecoder, PrimitiveArrayDecoder};
+use super::{ArrayBatchDecoder, DecimalArrayDecoder, PrimitiveArrayDecoder};
 use crate::error::UnsupportedTypeVariantSnafu;
 
+const NANOSECONDS_IN_SECOND: i128 = 1_000_000_000;
+const NANOSECOND_DIGITS: i8 = 9;
+
 /// Statically dispatches to the right ArrowTimestampType based on the value of $time_unit
 /// to create a $decoder_type with that type as type parameter and $iter/$present as value
 /// parameters, then applies $f to it and $tz.
@@ -40,7 +44,7 @@ macro_rules! decoder_for_time_unit {
 
         match $time_unit {
             TimeUnit::Second => {
-                let iter = Box::new(TimestampIterator::<TimestampSecondType>::new(
+                let iter = Box::new(TimestampIterator::<TimestampSecondType, _>::new(
                     $seconds_since_unix_epoch,
                     data,
                     secondary,
@@ -51,7 +55,7 @@ macro_rules! decoder_for_time_unit {
                 )))
             }
             TimeUnit::Millisecond => {
-                let iter = Box::new(TimestampIterator::<TimestampMillisecondType>::new(
+                let iter = Box::new(TimestampIterator::<TimestampMillisecondType, _>::new(
                     $seconds_since_unix_epoch,
                     data,
                     secondary,
@@ -62,7 +66,7 @@ macro_rules! decoder_for_time_unit {
                 )))
             }
             TimeUnit::Microsecond => {
-                let iter = Box::new(TimestampIterator::<TimestampMicrosecondType>::new(
+                let iter = Box::new(TimestampIterator::<TimestampMicrosecondType, _>::new(
                     $seconds_since_unix_epoch,
                     data,
                     secondary,
@@ -73,7 +77,7 @@ macro_rules! decoder_for_time_unit {
                 )))
             }
             TimeUnit::Nanosecond => {
-                let iter = Box::new(TimestampIterator::<TimestampNanosecondType>::new(
+                let iter = Box::new(TimestampIterator::<TimestampNanosecondType, _>::new(
                     $seconds_since_unix_epoch,
                     data,
                     secondary,
@@ -87,6 +91,52 @@ macro_rules! decoder_for_time_unit {
     }};
 }
 
+fn decimal128_decoder(
+    column: &Column,
+    stripe: &Stripe,
+    seconds_since_unix_epoch: i64,
+    writer_tz: Option<Tz>,
+) -> Result<DecimalArrayDecoder> {
+    let data = stripe.stream_map().get(column, Kind::Data);
+    let data = get_rle_reader(column, data)?;
+
+    let secondary = stripe.stream_map().get(column, Kind::Secondary);
+    let secondary = get_rle_reader(column, secondary)?;
+
+    let present = get_present_vec(column, stripe)?
+        .map(|iter| Box::new(iter.into_iter()) as Box<dyn Iterator<Item = bool> + Send>);
+
+    let iter = TimestampIterator::<TimestampNanosecondType, i128>::new(
+        seconds_since_unix_epoch,
+        data,
+        secondary,
+    );
+
+    let iter: Box<dyn Iterator<Item = _> + Send> = match writer_tz {
+        Some(writer_tz) => Box::new(iter.map(move |ts| {
+            let ts = ts?;
+            let seconds = ts.div_euclid(NANOSECONDS_IN_SECOND);
+            let nanoseconds = ts.rem_euclid(NANOSECONDS_IN_SECOND);
+            let dt = (writer_tz.timestamp_nanos(0)
+                + TimeDelta::new(seconds as i64, nanoseconds as u32)
+                    .expect("TimeDelta duration out of bound"))
+            .naive_local()
+            .and_utc();
+
+            Ok((dt.timestamp() as i128) * NANOSECONDS_IN_SECOND
+                + (dt.timestamp_subsec_nanos() as i128))
+        })),
+        None => Box::new(iter),
+    };
+
+    Ok(DecimalArrayDecoder::new(
+        Decimal128Type::MAX_PRECISION,
+        NANOSECOND_DIGITS,
+        iter,
+        present,
+    ))
+}
+
 /// Seconds from ORC epoch of 1 January 2015, which serves as the 0
 /// point for all timestamp values, to the UNIX epoch of 1 January 1970.
 const ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH: i64 = 1_420_070_400;
@@ -99,100 +149,118 @@ pub fn new_timestamp_decoder(
     field_type: ArrowDataType,
     stripe: &Stripe,
 ) -> Result<Box<dyn ArrayBatchDecoder>> {
-    let ArrowDataType::Timestamp(time_unit, None) = field_type else {
-        MismatchedSchemaSnafu {
-            orc_type: column.data_type().clone(),
-            arrow_type: field_type,
-        }
-        .fail()?
-    };
-
-    match stripe.writer_tz() {
+    let seconds_since_unix_epoch = match stripe.writer_tz() {
         Some(writer_tz) => {
             // If writer timezone exists then we must take the ORC epoch according
             // to that timezone, and find seconds since UTC UNIX epoch for the base.
-            let seconds_since_unix_epoch = writer_tz
+            writer_tz
                 .with_ymd_and_hms(2015, 1, 1, 0, 0, 0)
                 .unwrap()
-                .timestamp();
+                .timestamp()
+        }
+        None => {
+            // No writer timezone, we can assume UTC, so we can use known fixed value
+            // for the base offset.
+            ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH
+        }
+    };
 
-            fn f<T: ArrowTimestampType>(
-                decoder: PrimitiveArrayDecoder<T>,
-                writer_tz: Tz,
-            ) -> TimestampOffsetArrayDecoder<T> {
-                TimestampOffsetArrayDecoder {
-                    inner: decoder,
+    match field_type {
+        ArrowDataType::Timestamp(time_unit, None) => match stripe.writer_tz() {
+            Some(writer_tz) => {
+                fn f<T: ArrowTimestampType>(
+                    decoder: PrimitiveArrayDecoder<T>,
+                    writer_tz: Tz,
+                ) -> TimestampOffsetArrayDecoder<T> {
+                    TimestampOffsetArrayDecoder {
+                        inner: decoder,
+                        writer_tz,
+                    }
+                }
+                decoder_for_time_unit!(
+                    column,
+                    time_unit,
+                    seconds_since_unix_epoch,
+                    stripe,
                     writer_tz,
+                    f,
+                )
+            }
+            None => {
+                fn f<T: ArrowTimestampType>(
+                    decoder: PrimitiveArrayDecoder<T>,
+                    _writer_tz: (),
+                ) -> PrimitiveArrayDecoder<T> {
+                    decoder
                 }
+
+                decoder_for_time_unit!(column, time_unit, seconds_since_unix_epoch, stripe, (), f,)
             }
-            decoder_for_time_unit!(
+        },
+        ArrowDataType::Decimal128(Decimal128Type::MAX_PRECISION, NANOSECOND_DIGITS) => {
+            // TODO: add support for non-None writer TZ
+            Ok(Box::new(decimal128_decoder(
                 column,
-                time_unit,
-                seconds_since_unix_epoch,
                 stripe,
-                writer_tz,
-                f,
-            )
+                seconds_since_unix_epoch,
+                stripe.writer_tz(),
+            )?))
         }
-        None => {
+        _ => MismatchedSchemaSnafu {
+            orc_type: column.data_type().clone(),
+            arrow_type: field_type,
+        }
+        .fail(),
+    }
+}
+
+/// Decodes a TIMESTAMP_INSTANT column stripe into batches of
+/// Timestamp{Nano,Micro,Milli,}secondArrays with UTC timezone.
+pub fn new_timestamp_instant_decoder(
+    column: &Column,
+    field_type: ArrowDataType,
+    stripe: &Stripe,
+) -> Result<Box<dyn ArrayBatchDecoder>> {
+    match field_type {
+        ArrowDataType::Timestamp(time_unit, Some(tz)) => {
+            ensure!(
+                tz.as_ref() == "UTC",
+                UnsupportedTypeVariantSnafu {
+                    msg: "Non-UTC Arrow timestamps"
+                }
+            );
+
             fn f<T: ArrowTimestampType>(
                 decoder: PrimitiveArrayDecoder<T>,
                 _writer_tz: (),
-            ) -> PrimitiveArrayDecoder<T> {
-                decoder
+            ) -> TimestampInstantArrayDecoder<T> {
+                TimestampInstantArrayDecoder(decoder)
             }
 
             decoder_for_time_unit!(
                 column,
                 time_unit,
-                // No writer timezone, we can assume UTC, so we can use known fixed value
-                // for the base offset.
+                // TIMESTAMP_INSTANT is encoded as UTC so we don't check writer timezone in stripe
                 ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH,
                 stripe,
                 (),
                 f,
             )
         }
-    }
-}
-
-/// Decodes a TIMESTAMP_INSTANT column stripe into batches of
-/// Timestamp{Nano,Micro,Milli,}secondArrays with UTC timezone.
-pub fn new_timestamp_instant_decoder(
-    column: &Column,
-    field_type: ArrowDataType,
-    stripe: &Stripe,
-) -> Result<Box<dyn ArrayBatchDecoder>> {
-    let ArrowDataType::Timestamp(time_unit, Some(tz)) = field_type else {
-        MismatchedSchemaSnafu {
+        ArrowDataType::Decimal128(Decimal128Type::MAX_PRECISION, NANOSECOND_DIGITS) => {
+            Ok(Box::new(decimal128_decoder(
+                column,
+                stripe,
+                ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH,
+                None,
+            )?))
+        }
+        _ => MismatchedSchemaSnafu {
             orc_type: column.data_type().clone(),
             arrow_type: field_type,
         }
-        .fail()?
-    };
-    ensure!(
-        tz.as_ref() == "UTC",
-        UnsupportedTypeVariantSnafu {
-            msg: "Non-UTC Arrow timestamps"
-        }
-    );
-
-    fn f<T: ArrowTimestampType>(
-        decoder: PrimitiveArrayDecoder<T>,
-        _writer_tz: (),
-    ) -> TimestampInstantArrayDecoder<T> {
-        TimestampInstantArrayDecoder(decoder)
+        .fail()?,
     }
-
-    decoder_for_time_unit!(
-        column,
-        time_unit,
-        // TIMESTAMP_INSTANT is encoded as UTC so we don't check writer timezone in stripe
-        ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH,
-        stripe,
-        (),
-        f,
-    )
 }
 
 /// Wrapper around PrimitiveArrayDecoder to decode timestamps which are encoded in
diff --git a/src/reader/decode/timestamp.rs b/src/reader/decode/timestamp.rs
index 5f846dd..834d016 100644
--- a/src/reader/decode/timestamp.rs
+++ b/src/reader/decode/timestamp.rs
@@ -7,14 +7,14 @@ use crate::error::{DecodeTimestampSnafu, Result};
 
 const NANOSECONDS_IN_SECOND: i64 = 1_000_000_000;
 
-pub struct TimestampIterator<T: ArrowTimestampType> {
+pub struct TimestampIterator<T: ArrowTimestampType, Item: TryFrom<i128>> {
     base_from_epoch: i64,
     data: Box<dyn Iterator<Item = Result<i64>> + Send>,
     secondary: Box<dyn Iterator<Item = Result<u64>> + Send>,
-    _marker: PhantomData<T>,
+    _marker: PhantomData<(T, Item)>,
 }
 
-impl<T: ArrowTimestampType> TimestampIterator<T> {
+impl<T: ArrowTimestampType, Item: TryFrom<i128>> TimestampIterator<T, Item> {
     pub fn new(
         base_from_epoch: i64,
         data: Box<dyn Iterator<Item = Result<i64>> + Send>,
@@ -29,22 +29,23 @@ impl<T: ArrowTimestampType> TimestampIterator<T> {
     }
 }
 
-impl<T: ArrowTimestampType> Iterator for TimestampIterator<T> {
-    type Item = Result<i64>;
+impl<T: ArrowTimestampType, Item: TryFrom<i128>> Iterator for TimestampIterator<T, Item> {
+    type Item = Result<Item>;
 
     fn next(&mut self) -> Option<Self::Item> {
         // TODO: throw error for mismatched stream lengths?
         let (seconds_since_orc_base, nanoseconds) =
             self.data.by_ref().zip(self.secondary.by_ref()).next()?;
-        decode_timestamp::<T>(self.base_from_epoch, seconds_since_orc_base, nanoseconds).transpose()
+        decode_timestamp::<T, _>(self.base_from_epoch, seconds_since_orc_base, nanoseconds)
+            .transpose()
     }
 }
 
-fn decode_timestamp<T: ArrowTimestampType>(
+fn decode_timestamp<T: ArrowTimestampType, Ret: TryFrom<i128>>(
     base: i64,
     seconds_since_orc_base: Result<i64>,
     nanoseconds: Result<u64>,
-) -> Result<Option<i64>> {
+) -> Result<Option<Ret>> {
     let data = seconds_since_orc_base?;
     let mut nanoseconds = nanoseconds?;
     // Last 3 bits indicate how many trailing zeros were truncated
diff --git a/tests/basic/main.rs b/tests/basic/main.rs
index 402bfc4..9a541f7 100644
--- a/tests/basic/main.rs
+++ b/tests/basic/main.rs
@@ -1,7 +1,7 @@
 use std::fs::File;
 use std::sync::Arc;
 
-use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+use arrow::datatypes::{DataType, Decimal128Type, DecimalType, Field, Schema, TimeUnit};
 use arrow::record_batch::{RecordBatch, RecordBatchReader};
 use arrow::util::pretty;
 #[cfg(feature = "async")]
@@ -457,6 +457,35 @@ fn custom_precision_timestamps_test(time_unit: TimeUnit) {
     assert_batches_eq(&batch, &expected);
 }
 
+#[test]
+pub fn decimal128_timestamps_test() {
+    let path = basic_path("overflowing_timestamps.orc");
+    let f = File::open(path).expect("no file found");
+    let reader = ArrowReaderBuilder::try_new(f)
+        .unwrap()
+        .with_schema(Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new(
+                "ts",
+                DataType::Decimal128(Decimal128Type::MAX_PRECISION, 9),
+                true,
+            ),
+        ])))
+        .build();
+    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
+    println!("{:?}", batch[0].column(1));
+    let expected = [
+        "+----+------------------------+",
+        "| id | ts                     |",
+        "+----+------------------------+",
+        "| 1  | 12345678.000000000     |",
+        "| 2  | -62135596800.000000000 |",
+        "| 3  | 12345678.000000000     |",
+        "+----+------------------------+",
+    ];
+    assert_batches_eq(&batch, &expected);
+}
+
 // From https://github.com/apache/arrow-rs/blob/7705acad845e8b2a366a08640f7acb4033ed7049/arrow-flight/src/sql/metadata/mod.rs#L67-L75
 pub fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
     let formatted = pretty::pretty_format_batches(batches).unwrap().to_string();

From 28e911bdf815bdcd8cc1225a4afc0627902400ef Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 20 Jun 2024 09:19:20 +0200
Subject: [PATCH 2/3] Avoid overflows

---
 src/array_decoder/timestamp.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/array_decoder/timestamp.rs b/src/array_decoder/timestamp.rs
index a48cd69..24ddaf8 100644
--- a/src/array_decoder/timestamp.rs
+++ b/src/array_decoder/timestamp.rs
@@ -15,7 +15,7 @@ use arrow::datatypes::{
 };
 use chrono::offset::TimeZone;
 use chrono::TimeDelta;
-use chrono_tz::Tz;
+use chrono_tz::{Tz, UTC};
 use snafu::ensure;
 
 use super::{ArrayBatchDecoder, DecimalArrayDecoder, PrimitiveArrayDecoder};
@@ -113,10 +113,14 @@ fn decimal128_decoder(
     );
 
     let iter: Box<dyn Iterator<Item = _> + Send> = match writer_tz {
+        Some(UTC) => Box::new(iter), // Avoid overflow-able operations below
         Some(writer_tz) => Box::new(iter.map(move |ts| {
             let ts = ts?;
             let seconds = ts.div_euclid(NANOSECONDS_IN_SECOND);
             let nanoseconds = ts.rem_euclid(NANOSECONDS_IN_SECOND);
+
+            // The addition panics in case of overflow, because chrono stores
+            // dates in an i32
             let dt = (writer_tz.timestamp_nanos(0)
                 + TimeDelta::new(seconds as i64, nanoseconds as u32)
                     .expect("TimeDelta duration out of bound"))
@@ -198,7 +202,6 @@ pub fn new_timestamp_decoder(
             }
         },
         ArrowDataType::Decimal128(Decimal128Type::MAX_PRECISION, NANOSECOND_DIGITS) => {
-            // TODO: add support for non-None writer TZ
             Ok(Box::new(decimal128_decoder(
                 column,
                 stripe,

From 70fef3f6777fb3c1d407d749825653d19882ca69 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 20 Jun 2024 09:46:55 +0200
Subject: [PATCH 3/3] Fix wording in comment

---
 src/array_decoder/timestamp.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/array_decoder/timestamp.rs b/src/array_decoder/timestamp.rs
index 24ddaf8..bc39c04 100644
--- a/src/array_decoder/timestamp.rs
+++ b/src/array_decoder/timestamp.rs
@@ -119,8 +119,8 @@ fn decimal128_decoder(
             let seconds = ts.div_euclid(NANOSECONDS_IN_SECOND);
             let nanoseconds = ts.rem_euclid(NANOSECONDS_IN_SECOND);
 
-            // The addition panics in case of overflow, because chrono stores
-            // dates in an i32
+            // The addition may panic, because chrono stores dates in an i32,
+            // which can be overflowed with an i64 of seconds.
             let dt = (writer_tz.timestamp_nanos(0)
                 + TimeDelta::new(seconds as i64, nanoseconds as u32)
                     .expect("TimeDelta duration out of bound"))