From 3ff334ad5e6f94d1a667ea2bb070ea79cf240718 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Tue, 19 Sep 2017 11:23:55 +0100 Subject: [PATCH 1/4] Clarify behavior of to_utc_timestamp/from_utc_timestamp with an example --- R/pkg/R/functions.R | 8 ++++---- python/pyspark/sql/functions.py | 8 ++++---- .../catalyst/expressions/datetimeExpressions.scala | 12 ++++++------ .../main/scala/org/apache/spark/sql/functions.scala | 8 ++++---- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index e92e1fd72bf1..74908ef4bec2 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -2226,8 +2226,8 @@ setMethod("from_json", signature(x = "Column", schema = "characterOrstructType") }) #' @details -#' \code{from_utc_timestamp}: Given a timestamp, which corresponds to a certain time of day in UTC, -#' returns another timestamp that corresponds to the same time of day in the given timezone. +#' \code{from_utc_timestamp}: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and +# renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'. #' #' @rdname column_datetime_diff_functions #' @@ -2286,8 +2286,8 @@ setMethod("next_day", signature(y = "Column", x = "character"), }) #' @details -#' \code{to_utc_timestamp}: Given a timestamp, which corresponds to a certain time of day -#' in the given timezone, returns another timestamp that corresponds to the same time of day in UTC. +#' \code{to_utc_timestamp}: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given +# time zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'. #' #' @rdname column_datetime_diff_functions #' @aliases to_utc_timestamp to_utc_timestamp,Column,character-method diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 399bef02d9cc..a52b73b9140c 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1150,8 +1150,8 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'): @since(1.5) def from_utc_timestamp(timestamp, tz): """ - Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp - that corresponds to the same time of day in the given timezone. + Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a + timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(from_utc_timestamp(df.t, "PST").alias('local_time')).collect() @@ -1164,8 +1164,8 @@ def from_utc_timestamp(timestamp, tz): @since(1.5) def to_utc_timestamp(timestamp, tz): """ - Given a timestamp, which corresponds to a certain time of day in the given timezone, returns - another timestamp that corresponds to the same time of day in UTC. + Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, and renders + that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['ts']) >>> df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 70354200c82d..41975de4e73e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -978,12 +978,12 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S } /** - * Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp - * that corresponds to the same time of day in the given timezone. + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a + * timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'. */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(timestamp, timezone) - Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp that corresponds to the same time of day in the given timezone.", + usage = "_FUNC_(timestamp, timezone) - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'.", examples = """ Examples: > SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul'); @@ -1153,12 +1153,12 @@ case class MonthsBetween(date1: Expression, date2: Expression, timeZoneId: Optio } /** - * Given a timestamp, which corresponds to a certain time of day in the given timezone, returns - * another timestamp that corresponds to the same time of day in UTC. + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, and renders that + * time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'. */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(timestamp, timezone) - Given a timestamp, which corresponds to a certain time of day in the given timezone, returns another timestamp that corresponds to the same time of day in UTC.", + usage = "_FUNC_(timestamp, timezone) - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'.", examples = """ Examples: > SELECT _FUNC_('2016-08-31', 'Asia/Seoul'); diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c6d0d86384b7..a1f56dd0c8b1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2791,8 +2791,8 @@ object functions { } /** - * Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp - * that corresponds to the same time of day in the given timezone. + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time + * as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'. * @group datetime_funcs * @since 1.5.0 */ @@ -2801,8 +2801,8 @@ object functions { } /** - * Given a timestamp, which corresponds to a certain time of day in the given timezone, returns - * another timestamp that corresponds to the same time of day in UTC. + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, and + * renders that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'. * @group datetime_funcs * @since 1.5.0 */ From fc10be85718f75f4262c2a43c34471a8ad220499 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Tue, 19 Sep 2017 11:32:28 +0100 Subject: [PATCH 2/4] Fix line lengths --- .../sql/catalyst/expressions/datetimeExpressions.scala | 10 ++++++---- .../main/scala/org/apache/spark/sql/functions.scala | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 41975de4e73e..eaf878888821 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -978,8 +978,9 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S } /** - * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a - * timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'. + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders + * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield + * '2017-07-14 03:40:00.0'. */ // scalastyle:off line.size.limit @ExpressionDescription( @@ -1153,8 +1154,9 @@ case class MonthsBetween(date1: Expression, date2: Expression, timeZoneId: Optio } /** - * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, and renders that - * time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'. + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, + * and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield + * '2017-07-14 01:40:00.0'. */ // scalastyle:off line.size.limit @ExpressionDescription( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index a1f56dd0c8b1..6bbdfa3ad189 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2791,8 +2791,9 @@ object functions { } /** - * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time - * as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'. + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders + * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield + * '2017-07-14 03:40:00.0'. * @group datetime_funcs * @since 1.5.0 */ @@ -2801,8 +2802,9 @@ object functions { } /** - * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, and - * renders that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'. + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time + * zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield + * '2017-07-14 01:40:00.0'. * @group datetime_funcs * @since 1.5.0 */ From 87874b6d44731158ae82d432e65946809332c5f4 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Tue, 19 Sep 2017 11:49:36 +0100 Subject: [PATCH 3/4] Fix Python line length --- python/pyspark/sql/functions.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index a52b73b9140c..57068fbae1dd 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1150,8 +1150,9 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'): @since(1.5) def from_utc_timestamp(timestamp, tz): """ - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a - timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'. + Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders + that time as a timestamp in the given time zone. For example, 'GMT+1' would yield + '2017-07-14 03:40:00.0'. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(from_utc_timestamp(df.t, "PST").alias('local_time')).collect() @@ -1164,8 +1165,9 @@ def from_utc_timestamp(timestamp, tz): @since(1.5) def to_utc_timestamp(timestamp, tz): """ - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone, and renders - that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'. + Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time + zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield + '2017-07-14 01:40:00.0'. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['ts']) >>> df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect() From d81acdc4898f4f002525150f12887779df7f63a4 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 20 Sep 2017 09:49:32 +0100 Subject: [PATCH 4/4] Fix R docs --- R/pkg/R/functions.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 74908ef4bec2..9f286263c216 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -2226,8 +2226,9 @@ setMethod("from_json", signature(x = "Column", schema = "characterOrstructType") }) #' @details -#' \code{from_utc_timestamp}: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and -# renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'. +#' \code{from_utc_timestamp}: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a +#' time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' +#' would yield '2017-07-14 03:40:00.0'. #' #' @rdname column_datetime_diff_functions #' @@ -2286,8 +2287,9 @@ setMethod("next_day", signature(y = "Column", x = "character"), }) #' @details -#' \code{to_utc_timestamp}: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given -# time zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14 01:40:00.0'. +#' \code{to_utc_timestamp}: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a +#' time in the given time zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' +#' would yield '2017-07-14 01:40:00.0'. #' #' @rdname column_datetime_diff_functions #' @aliases to_utc_timestamp to_utc_timestamp,Column,character-method