You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2017/02/07 14:50:41 UTC

spark git commit: [SPARK-16609] Add to_date/to_timestamp with format functions

Repository: spark
Updated Branches:
  refs/heads/master 6ed285c68 -> 7a7ce272f


[SPARK-16609] Add to_date/to_timestamp with format functions

## What changes were proposed in this pull request?

This pull request adds two new user facing functions:
- `to_date` which accepts an expression and a format and returns a date.
- `to_timestamp` which accepts an expression and a format and returns a timestamp.

For example, Given a date in format: `2016-21-05`. (YYYY-dd-MM)

### Date Function
*Previously*
```
to_date(unix_timestamp(lit("2016-21-05"), "yyyy-dd-MM").cast("timestamp"))
```
*Current*
```
to_date(lit("2016-21-05"), "yyyy-dd-MM")
```

### Timestamp Function
*Previously*
```
unix_timestamp(lit("2016-21-05"), "yyyy-dd-MM").cast("timestamp")
```
*Current*
```
to_timestamp(lit("2016-21-05"), "yyyy-dd-MM")
```
### Tasks

- [X] Add `to_date` to Scala Functions
- [x] Add `to_date` to Python Functions
- [x] Add `to_date` to SQL Functions
- [X] Add `to_timestamp` to Scala Functions
- [x] Add `to_timestamp` to Python Functions
- [x] Add `to_timestamp` to SQL Functions
- [x] Add function to R

## How was this patch tested?

- [x] Add Functions to `DateFunctionsSuite`
- Test new `ParseToTimestamp` Expression (*not necessary*)
- Test new `ParseToDate` Expression (*not necessary*)
- [x] Add test for R
- [x] Add test for Python in test.py

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: anabranch <wa...@gmail.com>
Author: Bill Chambers <bi...@databricks.com>
Author: anabranch <bi...@databricks.com>

Closes #16138 from anabranch/SPARK-16609.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a7ce272
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a7ce272
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a7ce272

Branch: refs/heads/master
Commit: 7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db
Parents: 6ed285c
Author: anabranch <wa...@gmail.com>
Authored: Tue Feb 7 15:50:30 2017 +0100
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Tue Feb 7 15:50:30 2017 +0100

----------------------------------------------------------------------
 R/pkg/NAMESPACE                                 |  1 +
 R/pkg/R/functions.R                             | 80 ++++++++++++++++++--
 R/pkg/R/generics.R                              |  6 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R       |  1 +
 python/pyspark/sql/functions.py                 | 52 +++++++++++--
 python/pyspark/sql/tests.py                     |  9 +++
 .../catalyst/analysis/FunctionRegistry.scala    |  3 +-
 .../expressions/datetimeExpressions.scala       | 64 ++++++++++++++++
 .../scala/org/apache/spark/sql/functions.scala  | 33 ++++++++
 .../apache/spark/sql/DateFunctionsSuite.scala   | 68 +++++++++++++++--
 .../sql/catalyst/ExpressionToSQLSuite.scala     |  1 +
 11 files changed, 296 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 7c16ba3..625c797 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -325,6 +325,7 @@ exportMethods("%in%",
               "toDegrees",
               "toRadians",
               "to_date",
+              "to_timestamp",
               "to_utc_timestamp",
               "translate",
               "trim",

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 6ffa0f5..032cfec 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1730,24 +1730,90 @@ setMethod("toRadians",
 
 #' to_date
 #'
-#' Converts the column into DateType.
+#' Converts the column into a DateType. You may optionally specify a format
+#' according to the rules in:
+#' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}.
+#' If the string cannot be parsed according to the specified format (or default),
+#' the value of the column will be null.
+#' The default format is 'yyyy-MM-dd'.
 #'
-#' @param x Column to compute on.
+#' @param x Column to parse.
+#' @param format string to use to parse x Column to DateType. (optional)
 #'
 #' @rdname to_date
 #' @name to_date
 #' @family datetime_funcs
-#' @aliases to_date,Column-method
+#' @aliases to_date,Column,missing-method
 #' @export
-#' @examples \dontrun{to_date(df$c)}
-#' @note to_date since 1.5.0
+#' @examples
+#' \dontrun{
+#' to_date(df$c)
+#' to_date(df$c, 'yyyy-MM-dd')
+#' }
+#' @note to_date(Column) since 1.5.0
 setMethod("to_date",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", format = "missing"),
+          function(x, format) {
             jc <- callJStatic("org.apache.spark.sql.functions", "to_date", x@jc)
             column(jc)
           })
 
+#' @rdname to_date
+#' @name to_date
+#' @family datetime_funcs
+#' @aliases to_date,Column,character-method
+#' @export
+#' @note to_date(Column, character) since 2.2.0
+setMethod("to_date",
+          signature(x = "Column", format = "character"),
+          function(x, format) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "to_date", x@jc, format)
+            column(jc)
+          })
+
+#' to_timestamp
+#'
+#' Converts the column into a TimestampType. You may optionally specify a format
+#' according to the rules in:
+#' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}.
+#' If the string cannot be parsed according to the specified format (or default),
+#' the value of the column will be null.
+#' The default format is 'yyyy-MM-dd HH:mm:ss'.
+#'
+#' @param x Column to parse.
+#' @param format string to use to parse x Column to DateType. (optional)
+#'
+#' @rdname to_timestamp
+#' @name to_timestamp
+#' @family datetime_funcs
+#' @aliases to_timestamp,Column,missing-method
+#' @export
+#' @examples
+#' \dontrun{
+#' to_timestamp(df$c)
+#' to_timestamp(df$c, 'yyyy-MM-dd')
+#' }
+#' @note to_timestamp(Column) since 2.2.0
+setMethod("to_timestamp",
+          signature(x = "Column", format = "missing"),
+          function(x, format) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "to_timestamp", x@jc)
+            column(jc)
+          })
+
+#' @rdname to_timestamp
+#' @name to_timestamp
+#' @family datetime_funcs
+#' @aliases to_timestamp,Column,character-method
+#' @export
+#' @note to_timestamp(Column, character) since 2.2.0
+setMethod("to_timestamp",
+          signature(x = "Column", format = "character"),
+          function(x, format) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "to_timestamp", x@jc, format)
+            column(jc)
+          })
+
 #' trim
 #'
 #' Trim the spaces from both ends for the specified string column.

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 0307bac..d78b1a1 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1256,7 +1256,11 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
 
 #' @rdname to_date
 #' @export
-setGeneric("to_date", function(x) { standardGeneric("to_date") })
+setGeneric("to_date", function(x, format) { standardGeneric("to_date") })
+
+#' @rdname to_timestamp
+#' @export
+setGeneric("to_timestamp", function(x, format) { standardGeneric("to_timestamp") })
 
 #' @rdname to_utc_timestamp
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 418f128..233a20c 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1237,6 +1237,7 @@ test_that("column functions", {
   c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
   c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
   c19 <- spark_partition_id()
+  c20 <- to_timestamp(c) + to_timestamp(c, "yyyy") + to_date(c, "yyyy")
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 02c2350..40727ab 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -143,6 +143,12 @@ _functions_2_1 = {
                'measured in radians.',
 }
 
+_functions_2_2 = {
+    'to_date': 'Converts a string date into a DateType using the (optionally) specified format.',
+    'to_timestamp': 'Converts a string timestamp into a timestamp type using the ' +
+                    '(optionally) specified format.',
+}
+
 # math functions that take two arguments as input
 _binary_mathfunctions = {
     'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' +
@@ -976,18 +982,52 @@ def months_between(date1, date2):
     return Column(sc._jvm.functions.months_between(_to_java_column(date1), _to_java_column(date2)))
 
 
-@since(1.5)
-def to_date(col):
-    """
-    Converts the column of :class:`pyspark.sql.types.StringType` or
-    :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`.
+@since(2.2)
+def to_date(col, format=None):
+    """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or
+    :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`
+    using the optionally specified format. Default format is 'yyyy-MM-dd'.
+    Specify formats according to
+    `SimpleDateFormats <http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html>`_.
 
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(to_date(df.t).alias('date')).collect()
     [Row(date=datetime.date(1997, 2, 28))]
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+    >>> df.select(to_date(df.t, 'yyyy-MM-dd HH:mm:ss').alias('date')).collect()
+    [Row(date=datetime.date(1997, 2, 28))]
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.to_date(_to_java_column(col)))
+    if format is None:
+        jc = sc._jvm.functions.to_date(_to_java_column(col))
+    else:
+        jc = sc._jvm.functions.to_date(_to_java_column(col), format)
+    return Column(jc)
+
+
+@since(2.2)
+def to_timestamp(col, format=None):
+    """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or
+    :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`
+    using the optionally specified format. Default format is 'yyyy-MM-dd HH:mm:ss'. Specify
+    formats according to
+    `SimpleDateFormats <http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html>`_.
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+    >>> df.select(to_timestamp(df.t).alias('dt')).collect()
+    [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+    >>> df.select(to_timestamp(df.t, 'yyyy-MM-dd HH:mm:ss').alias('dt')).collect()
+    [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
+    """
+    sc = SparkContext._active_spark_context
+    if format is None:
+        jc = sc._jvm.functions.to_timestamp(_to_java_column(col))
+    else:
+        jc = sc._jvm.functions.to_timestamp(_to_java_column(col), format)
+    return Column(jc)
 
 
 @since(1.5)

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/python/pyspark/sql/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 86cad4b..710585c 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1807,6 +1807,8 @@ class SQLTests(ReusedPySparkTestCase):
         self.assertTrue("+" in functions)
         self.assertTrue("like" in functions)
         self.assertTrue("month" in functions)
+        self.assertTrue("to_date" in functions)
+        self.assertTrue("to_timestamp" in functions)
         self.assertTrue("to_unix_timestamp" in functions)
         self.assertTrue("current_database" in functions)
         self.assertEquals(functions["+"], Function(
@@ -2189,6 +2191,13 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
         # Regression test for SPARK-17514: limit(n).collect() should the perform same as take(n)
         assert_runs_only_one_job_stage_and_task("collect_limit", lambda: df.limit(1).collect())
 
+    def test_datetime_functions(self):
+        from pyspark.sql import functions
+        from datetime import date, datetime
+        df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol")
+        parse_result = df.select(functions.to_date(functions.col("dateCol"))).first()
+        self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)'])
+
     @unittest.skipIf(sys.version_info < (3, 3), "Unittest < 3.3 doesn't support mocking")
     def test_unbounded_frames(self):
         from unittest.mock import patch

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index eea3740..9c9465f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -345,7 +345,8 @@ object FunctionRegistry {
     expression[CurrentTimestamp]("now"),
     expression[Quarter]("quarter"),
     expression[Second]("second"),
-    expression[ToDate]("to_date"),
+    expression[ParseToTimestamp]("to_timestamp"),
+    expression[ParseToDate]("to_date"),
     expression[ToUnixTimestamp]("to_unix_timestamp"),
     expression[ToUTCTimestamp]("to_utc_timestamp"),
     expression[TruncDate]("trunc"),

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index bad8a71..f8fe774 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -24,6 +24,7 @@ import java.util.{Calendar, TimeZone}
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
@@ -1170,6 +1171,69 @@ case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastIn
 }
 
 /**
+ * Parses a column to a date based on the given format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(date_str, fmt) - Parses the `left` expression with the `fmt` expression. Returns null with invalid input.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd');
+       2016-12-31
+  """)
+// scalastyle:on line.size.limit
+case class ParseToDate(left: Expression, format: Option[Expression], child: Expression)
+  extends RuntimeReplaceable {
+
+  def this(left: Expression, format: Expression) {
+      this(left, Option(format),
+        Cast(Cast(UnixTimestamp(left, format), TimestampType), DateType))
+  }
+
+  def this(left: Expression) = {
+    // backwards compatability
+    this(left, Option(null), ToDate(left))
+  }
+
+  override def flatArguments: Iterator[Any] = Iterator(left, format)
+  override def sql: String = {
+    if (format.isDefined) {
+      s"$prettyName(${left.sql}, ${format.get.sql}"
+    } else {
+      s"$prettyName(${left.sql})"
+    }
+  }
+
+  override def prettyName: String = "to_date"
+}
+
+/**
+ * Parses a column to a timestamp based on the supplied format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(timestamp, fmt) - Parses the `left` expression with the `format` expression to a timestamp. Returns null with invalid input.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd');
+       2016-12-31 00:00:00.0
+  """)
+// scalastyle:on line.size.limit
+case class ParseToTimestamp(left: Expression, format: Expression, child: Expression)
+  extends RuntimeReplaceable {
+
+  def this(left: Expression, format: Expression) = {
+  this(left, format, Cast(UnixTimestamp(left, format), TimestampType))
+}
+
+  override def flatArguments: Iterator[Any] = Iterator(left, format)
+  override def sql: String = s"$prettyName(${left.sql}, ${format.sql})"
+
+  override def prettyName: String = "to_timestamp"
+  override def dataType: DataType = TimestampType
+}
+
+/**
  * Returns date truncated to the unit specified by the format.
  */
 // scalastyle:off line.size.limit

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 5e27484..24ed906 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2665,6 +2665,27 @@ object functions {
   def unix_timestamp(s: Column, p: String): Column = withExpr {UnixTimestamp(s.expr, Literal(p)) }
 
   /**
+   * Convert time string to a Unix timestamp (in seconds).
+   * Uses the pattern "yyyy-MM-dd HH:mm:ss" and will return null on failure.
+   * @group datetime_funcs
+   * @since 2.2.0
+   */
+  def to_timestamp(s: Column): Column = withExpr {
+    new ParseToTimestamp(s.expr, Literal("yyyy-MM-dd HH:mm:ss"))
+  }
+
+  /**
+   * Convert time string to a Unix timestamp (in seconds) with a specified format
+   * (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
+   * to Unix timestamp (in seconds), return null if fail.
+   * @group datetime_funcs
+   * @since 2.2.0
+   */
+  def to_timestamp(s: Column, fmt: String): Column = withExpr {
+    new ParseToTimestamp(s.expr, Literal(fmt))
+  }
+
+  /**
    * Converts the column into DateType.
    *
    * @group datetime_funcs
@@ -2673,6 +2694,18 @@ object functions {
   def to_date(e: Column): Column = withExpr { ToDate(e.expr) }
 
   /**
+   * Converts the column into a DateType with a specified format
+   * (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
+   * return null if fail.
+   *
+   * @group datetime_funcs
+   * @since 2.2.0
+   */
+  def to_date(e: Column, fmt: String): Column = withExpr {
+    new ParseToDate(e.expr, Literal(fmt))
+  }
+
+  /**
    * Returns date truncated to the unit specified by the format.
    *
    * @param format: 'year', 'yyyy', 'yy' for truncate by year,

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index e05b225..618db43 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -354,31 +354,58 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
   test("function to_date") {
     val d1 = Date.valueOf("2015-07-22")
     val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
     val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
     val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
     val s1 = "2015-07-22 10:00:00"
     val s2 = "2014-12-31"
-    val df = Seq((d1, t1, s1), (d2, t2, s2)).toDF("d", "t", "s")
+    val s3 = "2014-31-12"
+    val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
 
     checkAnswer(
       df.select(to_date(col("t"))),
-      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31"))))
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
     checkAnswer(
       df.select(to_date(col("d"))),
-      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01"))))
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
     checkAnswer(
       df.select(to_date(col("s"))),
-      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31"))))
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
 
     checkAnswer(
       df.selectExpr("to_date(t)"),
-      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31"))))
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
     checkAnswer(
       df.selectExpr("to_date(d)"),
-      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01"))))
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
     checkAnswer(
       df.selectExpr("to_date(s)"),
-      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31"))))
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+      // Now with format
+    checkAnswer(
+      df.select(to_date(col("t"), "yyyy-MM-dd")),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"), "yyyy-MM-dd")),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("s"), "yyyy-MM-dd")),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2016-07-12"))))
+
+    //  now switch format
+    checkAnswer(
+      df.select(to_date(col("s"), "yyyy-dd-MM")),
+      Seq(Row(Date.valueOf("2016-10-07")), Row(Date.valueOf("2016-07-12")),
+        Row(Date.valueOf("2014-12-31"))))
   }
 
   test("function trunc") {
@@ -475,6 +502,33 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
   }
 
+
+  test("to_timestamp") {
+    val date1 = Date.valueOf("2015-07-24")
+    val date2 = Date.valueOf("2015-07-25")
+    val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
+    val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
+    val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
+    val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
+    val s1 = "2015/07/24 10:00:00.5"
+    val s2 = "2015/07/25 02:02:02.6"
+    val ss1 = "2015-07-24 10:00:00"
+    val ss2 = "2015-07-25 02:02:02"
+    val fmt = "yyyy/MM/dd HH:mm:ss.S"
+    val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
+
+    checkAnswer(df.select(to_timestamp(col("ss"))),
+      df.select(unix_timestamp(col("ss")).cast("timestamp")))
+    checkAnswer(df.select(to_timestamp(col("ss"))), Seq(
+      Row(ts1), Row(ts2)))
+    checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(
+      Row(ts1), Row(ts2)))
+    checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(
+      Row(ts1), Row(ts2)))
+    checkAnswer(df.select(to_timestamp(col("d"), "yyyy-MM-dd")), Seq(
+      Row(ts_date1), Row(ts_date2)))
+  }
+
   test("datediff") {
     val df = Seq(
       (Date.valueOf("2015-07-24"), Timestamp.valueOf("2015-07-24 01:00:00"),

http://git-wip-us.apache.org/repos/asf/spark/blob/7a7ce272/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala
index df9390a..1daa6f7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala
@@ -253,6 +253,7 @@ class ExpressionToSQLSuite extends SQLBuilderTest with SQLTestUtils {
     checkSqlGeneration("SELECT count(now())")
     checkSqlGeneration("SELECT quarter('2001-05-02')")
     checkSqlGeneration("SELECT second('11:35:55')")
+    checkSqlGeneration("SELECT to_timestamp('2001-10-30 10:30:00', 'yyyy-MM-dd HH:mm:ss')")
     checkSqlGeneration("SELECT to_date('2001-10-30 10:30:00')")
     checkSqlGeneration("SELECT to_unix_timestamp('2015-07-24 00:00:00', 'yyyy-MM-dd HH:mm:ss')")
     checkSqlGeneration("SELECT to_utc_timestamp('2015-07-24 00:00:00', 'PST')")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org