From a881b43f026f0493039d8d9a241e56fcd42f6eaf Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Mon, 13 Apr 2026 15:04:22 -0700
Subject: [PATCH 1/9] fix: Add legacy mode handling to cast Decimal to String

---
 .../spark-expr/src/conversion_funcs/cast.rs   |  18 ++-
 .../src/conversion_funcs/numeric.rs           |  89 +++++++++++++-
 .../apache/comet/expressions/CometCast.scala  |  16 ++-
 .../org/apache/comet/CometCastSuite.scala     | 110 ++++++++++++++++--
 4 files changed, 212 insertions(+), 21 deletions(-)

diff --git a/native/spark-expr/src/conversion_funcs/cast.rs b/native/spark-expr/src/conversion_funcs/cast.rs
index 9d3d90d460..a9d37ce5fa 100644
--- a/native/spark-expr/src/conversion_funcs/cast.rs
+++ b/native/spark-expr/src/conversion_funcs/cast.rs
@@ -19,12 +19,12 @@ use crate::conversion_funcs::boolean::{
     cast_boolean_to_decimal, cast_boolean_to_timestamp, is_df_cast_from_bool_spark_compatible,
 };
 use crate::conversion_funcs::numeric::{
-    cast_decimal_to_timestamp, cast_float32_to_decimal128, cast_float64_to_decimal128,
-    cast_float_to_timestamp, cast_int_to_decimal128, cast_int_to_timestamp,
-    is_df_cast_from_decimal_spark_compatible, is_df_cast_from_float_spark_compatible,
-    is_df_cast_from_int_spark_compatible, spark_cast_decimal_to_boolean,
-    spark_cast_float32_to_utf8, spark_cast_float64_to_utf8, spark_cast_int_to_int,
-    spark_cast_nonintegral_numeric_to_integral,
+    cast_decimal128_to_utf8, cast_decimal_to_timestamp, cast_float32_to_decimal128,
+    cast_float64_to_decimal128, cast_float_to_timestamp, cast_int_to_decimal128,
+    cast_int_to_timestamp, is_df_cast_from_decimal_spark_compatible,
+    is_df_cast_from_float_spark_compatible, is_df_cast_from_int_spark_compatible,
+    spark_cast_decimal_to_boolean, spark_cast_float32_to_utf8, spark_cast_float64_to_utf8,
+    spark_cast_int_to_int, spark_cast_nonintegral_numeric_to_integral,
 };
 use crate::conversion_funcs::string::{
     cast_string_to_date, cast_string_to_decimal, cast_string_to_float, cast_string_to_int,
@@ -381,6 +381,12 @@ pub(crate) fn cast_array(
             spark_cast_nonintegral_numeric_to_integral(&array, eval_mode, &from_type, to_type)
         }
         (Decimal128(_p, _s), Boolean) => spark_cast_decimal_to_boolean(&array),
+        // Spark LEGACY cast uses Java BigDecimal.toString() which produces scientific notation
+        // when adjusted_exponent < -6 (e.g. "0E-18" for zero with scale=18).
+        // TRY and ANSI use plain notation ("0.000000000000000000") so DataFusion handles those.
+        (Decimal128(_, scale), Utf8) if eval_mode == EvalMode::Legacy => {
+            cast_decimal128_to_utf8(&array, *scale)
+        }
         (Utf8View, Utf8) => Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?),
         (Struct(_), Utf8) => Ok(casts_struct_to_string(array.as_struct(), cast_options)?),
         (Struct(_), Struct(_)) => Ok(cast_struct_to_struct(
diff --git a/native/spark-expr/src/conversion_funcs/numeric.rs b/native/spark-expr/src/conversion_funcs/numeric.rs
index 59a65fb49f..0337ab1364 100644
--- a/native/spark-expr/src/conversion_funcs/numeric.rs
+++ b/native/spark-expr/src/conversion_funcs/numeric.rs
@@ -21,7 +21,7 @@ use crate::{EvalMode, SparkError, SparkResult};
 use arrow::array::{
     Array, ArrayRef, AsArray, BooleanBuilder, Decimal128Array, Decimal128Builder, Float32Array,
     Float64Array, GenericStringArray, Int16Array, Int32Array, Int64Array, Int8Array,
-    OffsetSizeTrait, PrimitiveArray, TimestampMicrosecondBuilder,
+    OffsetSizeTrait, PrimitiveArray, StringArray, TimestampMicrosecondBuilder,
 };
 use arrow::datatypes::{
     i256, is_validate_decimal_precision, ArrowPrimitiveType, DataType, Decimal128Type, Float32Type,
@@ -71,7 +71,11 @@ pub(crate) fn is_df_cast_from_decimal_spark_compatible(to_type: &DataType) -> bo
             | DataType::Float64
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _)
-            | DataType::Utf8 // note that there can be formatting differences
+            // DataFusion's Decimal128→Utf8 cast uses plain notation (toPlainString semantics),
+            // matching Spark's TRY and ANSI modes. LEGACY mode is handled by a separate match
+            // arm in cast_array that applies Java BigDecimal.toString() (scientific notation
+            // for values where adjusted_exponent < -6, e.g. "0E-18" for zero with scale=18).
+            | DataType::Utf8
     )
 }
 
@@ -569,6 +573,62 @@ pub(crate) fn format_decimal_str(value_str: &str, precision: usize, scale: i8) -
     }
 }
 
+/// Casts a Decimal128 array to string using Java's BigDecimal.toString() semantics,
+/// which is Spark's LEGACY eval mode behavior. Plain notation when scale >= 0 and
+/// adjusted_exponent >= -6, otherwise scientific notation (e.g. "0E-18" for zero
+/// with scale=18, since adjusted_exponent = -18 + 0 = -18 < -6).
+///
+/// TRY and ANSI modes produce plain notation via DataFusion's cast instead.
+pub(crate) fn cast_decimal128_to_utf8(array: &ArrayRef, scale: i8) -> SparkResult<ArrayRef> {
+    let decimal_array = array
+        .as_any()
+        .downcast_ref::<Decimal128Array>()
+        .expect("Expected a Decimal128Array");
+    let output: StringArray = decimal_array
+        .iter()
+        .map(|opt_val| opt_val.map(|unscaled| decimal128_to_java_string(unscaled, scale)))
+        .collect();
+    Ok(Arc::new(output))
+}
+
+/// Formats a Decimal128 unscaled value as a string matching Java's BigDecimal.toString():
+/// - Plain notation when scale >= 0 and adjusted_exponent >= -6
+/// - Scientific notation otherwise
+///
+/// adjusted_exponent = -scale + (numDigits - 1)
+fn decimal128_to_java_string(unscaled: i128, scale: i8) -> String {
+    let negative = unscaled < 0;
+    let sign = if negative { "-" } else { "" };
+    let coeff = unscaled.unsigned_abs().to_string();
+    let num_digits = coeff.len() as i64;
+    let adj_exp = -(scale as i64) + (num_digits - 1);
+
+    if scale >= 0 && adj_exp >= -6 {
+        let scale_u = scale as usize;
+        let num_digits_u = num_digits as usize;
+        if scale_u == 0 {
+            format!("{sign}{coeff}")
+        } else if num_digits_u > scale_u {
+            let (int_part, frac_part) = coeff.split_at(num_digits_u - scale_u);
+            format!("{sign}{int_part}.{frac_part}")
+        } else {
+            let leading = scale_u - num_digits_u;
+            format!("{sign}0.{}{coeff}", "0".repeat(leading))
+        }
+    } else {
+        let mantissa = if num_digits == 1 {
+            coeff.clone()
+        } else {
+            format!("{}.{}", &coeff[..1], &coeff[1..])
+        };
+        if adj_exp > 0 {
+            format!("{sign}{mantissa}E+{adj_exp}")
+        } else {
+            format!("{sign}{mantissa}E{adj_exp}")
+        }
+    }
+}
+
 pub(crate) fn spark_cast_float64_to_utf8<OffsetSize>(
     from: &dyn Array,
     _eval_mode: EvalMode,
@@ -1310,4 +1370,29 @@ mod tests {
         let f64_inf: ArrayRef = Arc::new(Float64Array::from(vec![Some(f64::INFINITY)]));
         assert!(cast_float_to_timestamp(&f64_inf, tz, EvalMode::Ansi).is_err());
     }
+
+    #[test]
+    fn test_decimal128_to_java_string() {
+        // scale >= 0, adj_exp >= -6 → plain notation
+        assert_eq!(decimal128_to_java_string(0, 0), "0");
+        assert_eq!(decimal128_to_java_string(0, 2), "0.00");
+        assert_eq!(decimal128_to_java_string(12345, 2), "123.45");
+        assert_eq!(decimal128_to_java_string(-12345, 2), "-123.45");
+        assert_eq!(decimal128_to_java_string(1, 2), "0.01");
+        assert_eq!(decimal128_to_java_string(42, 0), "42");
+        assert_eq!(decimal128_to_java_string(-42, 0), "-42");
+        assert_eq!(decimal128_to_java_string(1, 6), "0.000001"); // adj_exp = -6 (boundary)
+
+        // scale >= 0, adj_exp < -6 → scientific notation (Spark LEGACY mode)
+        assert_eq!(decimal128_to_java_string(0, 18), "0E-18"); // adj_exp = -18
+        assert_eq!(decimal128_to_java_string(0, 7), "0E-7");   // adj_exp = -7
+        assert_eq!(decimal128_to_java_string(1, 7), "1E-7");
+        assert_eq!(decimal128_to_java_string(1, 18), "1E-18");
+
+        // scale < 0 → scientific notation
+        assert_eq!(decimal128_to_java_string(0, -2), "0E+2");
+        assert_eq!(decimal128_to_java_string(1, -2), "1E+2");
+        assert_eq!(decimal128_to_java_string(123, -2), "1.23E+4");
+        assert_eq!(decimal128_to_java_string(-123, -2), "-1.23E+4");
+    }
 }
diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
index b999915413..6b2b31e898 100644
--- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
+++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -242,12 +242,18 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
             "There can be differences in precision. " +
               "For example, the input \"1.4E-45\" will produce 1.0E-45 " +
               "instead of 1.4E-45"))
+      case d: DecimalType if d.scale < 0 =>
+        // Negative-scale decimals require spark.sql.legacy.allowNegativeScaleOfDecimal=true.
+        // When that config is enabled, Spark formats them using Java BigDecimal.toString()
+        // which produces scientific notation (e.g. "1.23E+4"). Comet matches this behavior.
+        // When the config is disabled, negative-scale decimals cannot be created in Spark,
+        // so we mark this as incompatible to avoid native execution on unexpected inputs.
+        val allowNegativeScale = SQLConf.get
+          .getConfString("spark.sql.legacy.allowNegativeScaleOfDecimal", "false")
+          .toBoolean
+        if (allowNegativeScale) Compatible() else Incompatible()
       case _: DecimalType =>
-        // https://github.com/apache/datafusion-comet/issues/1068
-        Compatible(
-          Some(
-            "There can be formatting differences in some case due to Spark using " +
-              "scientific notation where Comet does not"))
+        Compatible()
       case DataTypes.BinaryType =>
         Compatible()
       case StructType(fields) =>
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 8c4d9fd1a3..a8c929dd35 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType,
 
 import org.apache.comet.expressions.{CometCast, CometEvalMode}
 import org.apache.comet.rules.CometScanTypeChecker
-import org.apache.comet.serde.Compatible
+import org.apache.comet.serde.{Compatible, Incompatible}
 
 class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
 
@@ -641,6 +641,68 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
     castTest(generateDecimalsPrecision10Scale2(), DataTypes.StringType)
   }
 
+  test("cast DecimalType(38,18) to StringType") {
+    castTest(generateDecimalsPrecision38Scale18(), DataTypes.StringType)
+  }
+
+  test("cast DecimalType with negative scale to StringType") {
+    // Negative-scale decimals are a legacy Spark feature gated on
+    // spark.sql.legacy.allowNegativeScaleOfDecimal=true. Spark LEGACY cast uses Java's
+    // BigDecimal.toString() which produces scientific notation for negative-scale values
+    // (e.g. 12300 stored as Decimal(7,-2) with unscaled=123 → "1.23E+4").
+    // CometCast.canCastToString checks the
+    // config and returns Incompatible when it is false.
+    //
+    // Parquet does not support negative-scale decimals so we use checkSparkAnswer directly
+    // (no parquet round-trip) to avoid schema coercion.
+
+    // With config enabled: Comet should match Spark's plain string output
+    withSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal" -> "true") {
+      val dfNeg2 = Seq(
+        Some(BigDecimal("0")),
+        Some(BigDecimal("100")),
+        Some(BigDecimal("12300")),
+        Some(BigDecimal("-99900")),
+        Some(BigDecimal("9999900")),
+        None)
+        .toDF("b")
+        .withColumn("a", col("b").cast(DecimalType(7, -2)))
+        .drop("b")
+        .select(col("a").cast(DataTypes.StringType).as("result"))
+      checkSparkAnswer(dfNeg2)
+
+      val dfNeg4 = Seq(
+        Some(BigDecimal("0")),
+        Some(BigDecimal("10000")),
+        Some(BigDecimal("120000")),
+        Some(BigDecimal("-9990000")),
+        None)
+        .toDF("b")
+        .withColumn("a", col("b").cast(DecimalType(7, -4)))
+        .drop("b")
+        .select(col("a").cast(DataTypes.StringType).as("result"))
+      checkSparkAnswer(dfNeg4)
+    }
+
+    // With config disabled (default): the SQL parser rejects negative scale, so
+    // negative-scale decimals cannot be created through normal SQL paths.
+    // CometCast.isSupported returns Incompatible for this case, ensuring Comet does
+    // not attempt native execution if such a value ever reaches the planner.
+    // Note: DecimalType(7, -2) must be constructed while config=true, because the
+    // constructor itself checks the config and throws if negative scale is disallowed.
+    val negScaleType = withSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal" -> "true") {
+      DecimalType(7, -2)
+    }
+    withSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal" -> "false") {
+      assert(
+        CometCast.isSupported(
+          negScaleType,
+          DataTypes.StringType,
+          None,
+          CometEvalMode.LEGACY) == Incompatible())
+    }
+  }
+
   test("cast DecimalType(10,2) to TimestampType") {
     castTest(generateDecimalsPrecision10Scale2(), DataTypes.TimestampType)
   }
@@ -1173,6 +1235,9 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("cast DateType to StringType") {
+    // generateDates() covers: 1970-2027 sampled monthly, DST transition dates, and edge
+    // cases including "999-01-01" (year < 1000, zero-padded to "0999-01-01") and
+    // "12345-01-01" (year > 9999, no truncation). Date→String is timezone-independent.
     castTest(generateDates(), DataTypes.StringType)
   }
 
@@ -1247,7 +1312,19 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("cast TimestampType to StringType") {
-    castTest(generateTimestamps(), DataTypes.StringType)
+    // UTC baseline — also exercises fractional-second trailing-zero stripping
+    // and pre-epoch values via generateTimestamps()
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      castTest(generateTimestamps(), DataTypes.StringType)
+    }
+    // Spark formats timestamps in the session timezone without tz suffix.
+    // pre_timestamp_cast shifts the UTC value by the session tz offset before
+    // passing to DataFusion, so DST-sensitive timezones must also be correct.
+    compatibleTimezones.foreach { tz =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        castTest(generateTimestamps(), DataTypes.StringType)
+      }
+    }
   }
 
   test("cast TimestampType to DateType") {
@@ -1690,15 +1767,32 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       values.toDF("str").select(col("str").cast(DataTypes.TimestampType).as("a")))
   }
 
-  private def generateTimestampLiterals(): Seq[String] =
-    Seq(
+  private def generateTimestamps(): DataFrame = {
+    val values = Seq(
+      // post-epoch with microseconds
       "2024-01-01T12:34:56.123456",
+      // UTC, no fractional seconds (output has no decimal point)
       "2024-01-01T01:00:00Z",
+      // year 9999 boundary
       "9999-12-31T01:00:00-02:00",
-      "2024-12-31T01:00:00+02:00")
-
-  private def generateTimestamps(): DataFrame = {
-    val values = generateTimestampLiterals()
+      // positive UTC offset
+      "2024-12-31T01:00:00+02:00",
+      // pre-epoch
+      "1960-01-01T00:00:00Z",
+      "1900-06-15T10:30:00Z",
+      // last microsecond before epoch
+      "1969-12-31T23:59:59.999999",
+      // year < 1000: Spark zero-pads to 4 digits (e.g. "0100-...")
+      "0100-03-01T00:00:00Z",
+      // fractional-second trailing-zero stripping
+      // .100000 → ".1", .123000 → ".123", .001000 → ".001", .000001 → ".000001"
+      "2024-06-01T00:00:00.100000",
+      "2024-06-01T00:00:00.123000",
+      "2024-06-01T00:00:00.001000",
+      "2024-06-01T00:00:00.000001",
+      // DST transition moments (America/New_York spring-forward / fall-back in UTC)
+      "2024-03-10T07:00:00Z",
+      "2024-11-03T06:00:00Z")
     withNulls(values)
       .toDF("str")
       .withColumn("a", col("str").cast(DataTypes.TimestampType))

From 08f6833d9d6dba53fd0ce415afc859b875d93c0f Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Mon, 13 Apr 2026 15:35:44 -0700
Subject: [PATCH 2/9] format

---
 native/spark-expr/src/conversion_funcs/numeric.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/native/spark-expr/src/conversion_funcs/numeric.rs b/native/spark-expr/src/conversion_funcs/numeric.rs
index 0337ab1364..9b46a2270e 100644
--- a/native/spark-expr/src/conversion_funcs/numeric.rs
+++ b/native/spark-expr/src/conversion_funcs/numeric.rs
@@ -1385,7 +1385,7 @@ mod tests {
 
         // scale >= 0, adj_exp < -6 → scientific notation (Spark LEGACY mode)
         assert_eq!(decimal128_to_java_string(0, 18), "0E-18"); // adj_exp = -18
-        assert_eq!(decimal128_to_java_string(0, 7), "0E-7");   // adj_exp = -7
+        assert_eq!(decimal128_to_java_string(0, 7), "0E-7"); // adj_exp = -7
         assert_eq!(decimal128_to_java_string(1, 7), "1E-7");
         assert_eq!(decimal128_to_java_string(1, 18), "1E-18");
 

From 65a1baabbbdd046c913e2a4c0020775562cd1cf1 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Mon, 13 Apr 2026 15:46:50 -0700
Subject: [PATCH 3/9] fix

---
 spark/src/test/scala/org/apache/comet/CometCastSuite.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index a8c929dd35..3bca0d0a0e 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -690,8 +690,9 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
     // not attempt native execution if such a value ever reaches the planner.
     // Note: DecimalType(7, -2) must be constructed while config=true, because the
     // constructor itself checks the config and throws if negative scale is disallowed.
-    val negScaleType = withSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal" -> "true") {
-      DecimalType(7, -2)
+    var negScaleType: DecimalType = null
+    withSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal" -> "true") {
+      negScaleType = DecimalType(7, -2)
     }
     withSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal" -> "false") {
       assert(

From 3b2daf783799e6968285bbc6de07ac943e8097f0 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Mon, 13 Apr 2026 18:00:03 -0700
Subject: [PATCH 4/9] write proleptic gregorian dates for tests to avoid
 mismatch

---
 .../org/apache/comet/CometCastSuite.scala     | 32 +++++++++++++++++--
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 3bca0d0a0e..deebd18ae5 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -1328,6 +1328,29 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
     }
   }
 
+  test("cast TimestampType to StringType - ancient timestamps") {
+    // Pre-1900 timestamps cannot go through Parquet (INT96 rejects them) so we create
+    // the data in-memory via microseconds-since-epoch cast to TimestampType.
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      // Epoch-micros for a few three-digit-year dates:
+      //   0100-03-01 00:00:00 UTC  = -59,006,361,600,000,000 µs from epoch
+      //   0500-06-15 12:30:00 UTC  = -46,374,377,400,000,000 µs from epoch
+      //   0999-12-31 23:59:59 UTC  = -30,610,224,001,000,000 µs from epoch
+      val ancientMicros = Seq(
+        -59006361600000000L, // 0100-03-01
+        -46374377400000000L, // 0500-06-15
+        -30610224001000000L) // 0999-12-31
+      ancientMicros
+        .toDF("micros")
+        .selectExpr("CAST(micros AS TIMESTAMP) AS a")
+        .createOrReplaceTempView("ancient_ts")
+      checkSparkAnswerAndOperator(
+        spark.sql("SELECT a, CAST(a AS STRING) FROM ancient_ts"))
+      checkSparkAnswerAndOperator(
+        spark.sql("SELECT a, CAST(a AS BIGINT) FROM ancient_ts"))
+    }
+  }
+
   test("cast TimestampType to DateType") {
     castTest(generateTimestamps(), DataTypes.DateType)
   }
@@ -1783,8 +1806,6 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       "1900-06-15T10:30:00Z",
       // last microsecond before epoch
       "1969-12-31T23:59:59.999999",
-      // year < 1000: Spark zero-pads to 4 digits (e.g. "0100-...")
-      "0100-03-01T00:00:00Z",
       // fractional-second trailing-zero stripping
       // .100000 → ".1", .123000 → ".123", .001000 → ".001", .000001 → ".000001"
       "2024-06-01T00:00:00.100000",
@@ -2012,7 +2033,12 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
 
   private def roundtripParquet(df: DataFrame, tempDir: File): DataFrame = {
     val filename = new File(tempDir, s"castTest_${System.currentTimeMillis()}.parquet").toString
-    df.write.mode(SaveMode.Overwrite).parquet(filename)
+    // CORRECTED mode writes timestamps as proleptic Gregorian without rebase.
+    // Required because generateTimestamps() includes pre-1900 values (e.g. 1900-06-15)
+    // which trigger INT96's default EXCEPTION mode when written with certain timezones.
+    withSQLConf("spark.sql.parquet.int96RebaseModeInWrite" -> "CORRECTED") {
+      df.write.mode(SaveMode.Overwrite).parquet(filename)
+    }
     spark.read.parquet(filename)
   }
 }

From 4897a9440ca0f8e1de1c76e3eeb1c2bc4a382307 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Mon, 13 Apr 2026 18:16:52 -0700
Subject: [PATCH 5/9] format

---
 .../src/test/scala/org/apache/comet/CometCastSuite.scala | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index deebd18ae5..778671cf86 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -1339,15 +1339,14 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       val ancientMicros = Seq(
         -59006361600000000L, // 0100-03-01
         -46374377400000000L, // 0500-06-15
-        -30610224001000000L) // 0999-12-31
+        -30610224001000000L
+      ) // 0999-12-31
       ancientMicros
         .toDF("micros")
         .selectExpr("CAST(micros AS TIMESTAMP) AS a")
         .createOrReplaceTempView("ancient_ts")
-      checkSparkAnswerAndOperator(
-        spark.sql("SELECT a, CAST(a AS STRING) FROM ancient_ts"))
-      checkSparkAnswerAndOperator(
-        spark.sql("SELECT a, CAST(a AS BIGINT) FROM ancient_ts"))
+      checkSparkAnswerAndOperator(spark.sql("SELECT a, CAST(a AS STRING) FROM ancient_ts"))
+      checkSparkAnswerAndOperator(spark.sql("SELECT a, CAST(a AS BIGINT) FROM ancient_ts"))
     }
   }
 

From f6e51cbcef1f5b9b29425cdfb172e22b6509c41a Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Tue, 14 Apr 2026 10:41:18 -0700
Subject: [PATCH 6/9] fix test

---
 spark/src/test/scala/org/apache/comet/CometCastSuite.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 778671cf86..d0e82405ee 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -1345,8 +1345,8 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
         .toDF("micros")
         .selectExpr("CAST(micros AS TIMESTAMP) AS a")
         .createOrReplaceTempView("ancient_ts")
-      checkSparkAnswerAndOperator(spark.sql("SELECT a, CAST(a AS STRING) FROM ancient_ts"))
-      checkSparkAnswerAndOperator(spark.sql("SELECT a, CAST(a AS BIGINT) FROM ancient_ts"))
+      checkSparkAnswer(spark.sql("SELECT CAST(a AS STRING) FROM ancient_ts"))
+      checkSparkAnswer(spark.sql("SELECT CAST(a AS BIGINT) FROM ancient_ts"))
     }
   }
 

From 891dd704c36535d42f5dc19cf78e66d5ac3bdb68 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Tue, 14 Apr 2026 15:24:02 -0700
Subject: [PATCH 7/9] improve test

---
 .../scala/org/apache/comet/CometCastSuite.scala  | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index d0e82405ee..4cf98feb60 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -656,8 +656,12 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
     // Parquet does not support negative-scale decimals so we use checkSparkAnswer directly
     // (no parquet round-trip) to avoid schema coercion.
 
-    // With config enabled: Comet should match Spark's plain string output
-    withSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal" -> "true") {
+    // With config enabled, enable localTableScan so Comet can take over the full plan
+    // and execute the cast natively. Parquet does not support negative-scale decimals so
+    // the data is kept in-memory; localTableScan.enabled bridges that gap.
+    withSQLConf(
+      "spark.sql.legacy.allowNegativeScaleOfDecimal" -> "true",
+      "spark.comet.exec.localTableScan.enabled" -> "true") {
       val dfNeg2 = Seq(
         Some(BigDecimal("0")),
         Some(BigDecimal("100")),
@@ -669,7 +673,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
         .withColumn("a", col("b").cast(DecimalType(7, -2)))
         .drop("b")
         .select(col("a").cast(DataTypes.StringType).as("result"))
-      checkSparkAnswer(dfNeg2)
+      checkSparkAnswerAndOperator(dfNeg2)
 
       val dfNeg4 = Seq(
         Some(BigDecimal("0")),
@@ -681,7 +685,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
         .withColumn("a", col("b").cast(DecimalType(7, -4)))
         .drop("b")
         .select(col("a").cast(DataTypes.StringType).as("result"))
-      checkSparkAnswer(dfNeg4)
+      checkSparkAnswerAndOperator(dfNeg4)
     }
 
     // With config disabled (default): the SQL parser rejects negative scale, so
@@ -1702,6 +1706,10 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       BigDecimal("-2147483647.123123123"),
       BigDecimal("-123456.789"),
       BigDecimal("0.00000000000"),
+      // Small-magnitude non-zero: adj_exp = -9 + 0 = -9 < -6, so LEGACY produces
+      // scientific notation "1E-9" / "1.000000000E-9" rather than plain "0.000000001".
+      BigDecimal("0.000000001"),
+      BigDecimal("-0.000000001"),
       BigDecimal("123456.789"),
       // Int Max
       BigDecimal("2147483647.123123123"),

From bbdd0b3562c87c4e49e0dc568966628d1c4995e1 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Wed, 15 Apr 2026 15:46:33 -0700
Subject: [PATCH 8/9] fix

---
 .../src/test/scala/org/apache/comet/CometCastSuite.scala  | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 4cf98feb60..fdd204a515 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -1798,8 +1798,9 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       values.toDF("str").select(col("str").cast(DataTypes.TimestampType).as("a")))
   }
 
-  private def generateTimestamps(): DataFrame = {
-    val values = Seq(
+
+  private def generateTimestampLiterals(): Seq[String] =
+    Seq(
       // post-epoch with microseconds
       "2024-01-01T12:34:56.123456",
       // UTC, no fractional seconds (output has no decimal point)
@@ -1822,6 +1823,9 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       // DST transition moments (America/New_York spring-forward / fall-back in UTC)
       "2024-03-10T07:00:00Z",
       "2024-11-03T06:00:00Z")
+
+  private def generateTimestamps(): DataFrame = {
+    val values = generateTimestampLiterals()
     withNulls(values)
       .toDF("str")
       .withColumn("a", col("str").cast(DataTypes.TimestampType))

From 7ab2fd132d3ca509d5e40d175c694797070d74c7 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Wed, 15 Apr 2026 17:29:14 -0700
Subject: [PATCH 9/9] spotless

---
 spark/src/test/scala/org/apache/comet/CometCastSuite.scala | 1 -
 1 file changed, 1 deletion(-)

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index fdd204a515..1c77c43c4c 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -1798,7 +1798,6 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       values.toDF("str").select(col("str").cast(DataTypes.TimestampType).as("a")))
   }
 
-
   private def generateTimestampLiterals(): Seq[String] =
     Seq(
       // post-epoch with microseconds