From b29507054717b85371bec9f0de5bbe53d4b77b41 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Li Date: Sun, 22 Mar 2026 23:15:26 -0700 Subject: [PATCH] [SPARK-56160][SQL] Add DataType classes for nanosecond timestamp types ### What changes were proposed in this pull request? This PR adds two new DataType classes for nanosecond-precision timestamps: - `TimestampNSType` (with local timezone semantics) - `TimestampNTZNSType` (without timezone semantics) Both are singleton types following the same pattern as `TimestampNTZType` (SPARK-35662). They are stored internally as a Long representing nanoseconds since the Unix epoch, with a default size of 8 bytes. The representable range is approximately 1677-09-21 to 2262-04-11. This PR also registers the new types in `DataTypes.java` (Java API) and `DataType.scala` (type name registry for JSON/DDL parsing). ### Why are the changes needed? Microsecond precision is insufficient for a growing number of workloads: - Parquet files written by Pandas/PyArrow default to `TIMESTAMP(NANOS)` - Iceberg V3 adds `timestamp_ns` / `timestamptz_ns` types - Financial exchange data (NYSE, NASDAQ, CME) uses nanosecond timestamps - OpenTelemetry traces use nanosecond timestamps Without native nanosecond types, Spark either throws `AnalysisException` on nanosecond Parquet columns or reads them as raw `LongType` via `spark.sql.legacy.parquet.nanosAsLong`, losing all timestamp semantics. This is the first step of native nanosecond timestamp support. Subsequent PRs will add SQL parser keywords, Cast rules, Parquet read/write, and Arrow integration. ### Does this PR introduce _any_ user-facing change? No. The types are defined but not yet wired into the SQL parser or any data source. ### How was this patch tested? Added `checkDefaultSize` tests in `DataTypeSuite` for both new types. ### Was this patch authored or co-authored using generative AI tooling? Yes, co-authored with Kiro. --- .../org/apache/spark/sql/types/DataTypes.java | 10 ++++ .../org/apache/spark/sql/types/DataType.scala | 2 + .../spark/sql/types/TimestampNSType.scala | 59 ++++++++++++++++++ .../spark/sql/types/TimestampNTZNSType.scala | 60 +++++++++++++++++++ .../spark/sql/types/DataTypeSuite.scala | 2 + 5 files changed, 133 insertions(+) create mode 100644 sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNSType.scala create mode 100644 sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNSType.scala diff --git a/sql/api/src/main/java/org/apache/spark/sql/types/DataTypes.java b/sql/api/src/main/java/org/apache/spark/sql/types/DataTypes.java index 2cfbb7b7f6847..7712178ac553c 100644 --- a/sql/api/src/main/java/org/apache/spark/sql/types/DataTypes.java +++ b/sql/api/src/main/java/org/apache/spark/sql/types/DataTypes.java @@ -59,6 +59,16 @@ public class DataTypes { */ public static final DataType TimestampNTZType = TimestampNTZType$.MODULE$; + /** + * Gets the TimestampNSType object. + */ + public static final DataType TimestampNSType = TimestampNSType$.MODULE$; + + /** + * Gets the TimestampNTZNSType object. + */ + public static final DataType TimestampNTZNSType = TimestampNTZNSType$.MODULE$; + /** * Gets the CalendarIntervalType object. */ diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index 48a6514440dd3..950326f8508ff 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -207,6 +207,8 @@ object DataType { YearMonthIntervalType(MONTH), YearMonthIntervalType(YEAR, MONTH), TimestampNTZType, + TimestampNSType, + TimestampNTZNSType, VariantType) ++ (TimeType.MIN_PRECISION to TimeType.MAX_PRECISION).map(TimeType(_))) .map(t => t.typeName -> t) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNSType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNSType.scala new file mode 100644 index 0000000000000..b22ec4a80f466 --- /dev/null +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNSType.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.types + +import org.apache.spark.annotation.Unstable + +/** + * The timestamp type represents a time instant in nanosecond precision. Valid range is + * [1677-09-21T00:12:43.145224192, 2262-04-11T23:47:16.854775807] where the left/right-bound is a + * date and time of the proleptic Gregorian calendar in UTC+00:00. + * + * Internally stored as a Long representing nanoseconds since the Unix epoch + * (1970-01-01T00:00:00Z). + * + * Please use the singleton `DataTypes.TimestampNSType` to refer the type. + * @since 4.2.0 + */ +@Unstable +class TimestampNSType private () extends DatetimeType { + + /** + * The default size of a value of the TimestampNSType is 8 bytes. + */ + override def defaultSize: Int = 8 + + override def typeName: String = "timestamp_ns" + + override def equals(obj: Any): Boolean = obj.isInstanceOf[TimestampNSType] + + override def hashCode(): Int = classOf[TimestampNSType].getSimpleName.hashCode + + private[spark] override def asNullable: TimestampNSType = this +} + +/** + * The companion case object and its class is separated so the companion object also subclasses + * the TimestampNSType class. Otherwise, the companion object would be of type "TimestampNSType$" + * in byte code. Defined with a private constructor so the companion object is the only possible + * instantiation. + * + * @since 4.2.0 + */ +@Unstable +case object TimestampNSType extends TimestampNSType diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNSType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNSType.scala new file mode 100644 index 0000000000000..57450c6eef21b --- /dev/null +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNSType.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.types + +import org.apache.spark.annotation.Unstable + +/** + * The timestamp without time zone type represents a local time in nanosecond precision, which is + * independent of time zone. Valid range is [1677-09-21T00:12:43.145224192, + * 2262-04-11T23:47:16.854775807]. To represent an absolute point in time, use `TimestampNSType` + * instead. + * + * Internally stored as a Long representing nanoseconds since the Unix epoch + * (1970-01-01T00:00:00). + * + * Please use the singleton `DataTypes.TimestampNTZNSType` to refer the type. + * @since 4.2.0 + */ +@Unstable +class TimestampNTZNSType private () extends DatetimeType { + + /** + * The default size of a value of the TimestampNTZNSType is 8 bytes. + */ + override def defaultSize: Int = 8 + + override def typeName: String = "timestamp_ntz_ns" + + override def equals(obj: Any): Boolean = obj.isInstanceOf[TimestampNTZNSType] + + override def hashCode(): Int = classOf[TimestampNTZNSType].getSimpleName.hashCode + + private[spark] override def asNullable: TimestampNTZNSType = this +} + +/** + * The companion case object and its class is separated so the companion object also subclasses + * the TimestampNTZNSType class. Otherwise, the companion object would be of type + * "TimestampNTZNSType$" in byte code. Defined with a private constructor so the companion object + * is the only possible instantiation. + * + * @since 4.2.0 + */ +@Unstable +case object TimestampNTZNSType extends TimestampNTZNSType diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index ce4f5e89be2b8..cfc1381df50d1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -386,6 +386,8 @@ class DataTypeSuite extends SparkFunSuite { checkDefaultSize(DateType, 4) checkDefaultSize(TimestampType, 8) checkDefaultSize(TimestampNTZType, 8) + checkDefaultSize(TimestampNSType, 8) + checkDefaultSize(TimestampNTZNSType, 8) checkDefaultSize(StringType, 20) checkDefaultSize(CharType(20), 20) checkDefaultSize(VarcharType(20), 20)