You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by wajda <gi...@git.apache.org> on 2018/05/04 16:25:44 UTC
[GitHub] spark pull request #21155: [SPARK-23927][SQL] Add "sequence" expression
Github user wajda commented on a diff in the pull request:
https://github.com/apache/spark/pull/21155#discussion_r186134394
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala ---
@@ -1059,3 +1063,316 @@ case class Flatten(child: Expression) extends UnaryExpression {
override def prettyName: String = "flatten"
}
+
+@ExpressionDescription(
+ usage = """
+ _FUNC_(start, stop, step) -
+ Generates an array of elements from start to stop (inclusive), incrementing by step.
+ The type of the returned elements is the same as the type of argument expressions.
+
+ Supported types are: byte, short, integer, long, date, timestamp.
+
+ The start and stop expressions must resolve to the same type.
+ If start and stop expressions resolve to the 'date' or 'timestamp' type
+ then the step expression must resolve to the 'interval' type, otherwise to the same type
+ as the start and stop expressions.
+ """,
+ arguments = """
+ Arguments:
+ * start - an expression. The start of the range.
+ * stop - an expression. The end the range (inclusive).
+ * step - an optional expression. The step of the range.
+
+ By default step is 1 if start is less than or equal to stop, otherwise -1.
+ For the temporal sequences it's 1 day and -1 day respectively.
+
+ If start is greater than stop then the step must be negative, and vice versa.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(1, 5);
+ [1, 2, 3, 4, 5]
+ > SELECT _FUNC_(5, 1);
+ [5, 4, 3, 2, 1]
+ > SELECT _FUNC_(to_date('2018-01-01'), to_date('2018-03-01'), interval 1 month);
+ [2018-01-01, 2018-02-01, 2018-03-01]
+ """,
+ since = "2.4.0"
+)
+case class Sequence(left: Expression,
+ middle: Expression,
+ right: Expression,
+ timeZoneId: Option[String] = None)
+ extends TernaryExpression with TimeZoneAwareExpression {
+
+ import Sequence._
+
+ def this(arg0: Expression, arg1: Expression) =
+ this(arg0, arg1, Sequence.defaultStepExpression(arg0, arg1), None)
+
+ def this(arg0: Expression, arg1: Expression, arg2: Expression) =
+ this(arg0, arg1, arg2, None)
+
+ override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+ copy(timeZoneId = Some(timeZoneId))
+
+ override def children: Seq[Expression] = Seq(left, middle, right)
+
+ override lazy val dataType: ArrayType = ArrayType(left.dataType, containsNull = false)
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ val Seq(startType, stopType, stepType) = children.map(_.dataType)
+ val typesCorrect =
+ startType.sameType(stopType) &&
+ (startType match {
+ case TimestampType | DateType => CalendarIntervalType acceptsType stepType
+ case _: IntegralType => stepType sameType startType
+ case _ => false
+ })
+
+ if (typesCorrect) {
+ TypeCheckResult.TypeCheckSuccess
+ } else {
+ TypeCheckResult.TypeCheckFailure(
+ s"$prettyName only supports integral, timestamp or date types")
+ }
+ }
+
+ private lazy val impl: SequenceImpl = dataType.elementType match {
+ case iType: IntegralType =>
+ type T = iType.InternalType
+ val ct = ClassTag[T](iType.tag.mirror.runtimeClass(iType.tag.tpe))
+ new IntegralSequenceImpl()(ct, iType.integral)
+
+ case TimestampType =>
+ new TemporalSequenceImpl[Long](LongType, 1, identity, timeZone)
+
+ case DateType =>
+ new TemporalSequenceImpl[Int](IntegerType, MICROS_PER_DAY, _.toInt, timeZone)
+ }
+
+ override protected def nullSafeEval(input1: Any, input2: Any, input3: Any): Any = {
+ ArrayData.toArrayData(impl.eval(input1, input2, input3))
+ }
+
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+ nullSafeCodeGen(ctx, ev, (start, stop, step) => {
+ val arr = ctx.freshName("arr")
+ val arrElemType = CodeGenerator.javaType(dataType.elementType)
+ s"""
+ | final $arrElemType[] $arr = null;
+ | ${impl.genCode(start, stop, step)(arr, arrElemType)(ctx)}
+ | ${ev.value} = UnsafeArrayData.fromPrimitiveArray($arr);
+ """.stripMargin
+ })
+ }
+}
+
+object Sequence {
+
+ private trait SequenceImpl {
+ def eval(input1: Any, input2: Any, input3: Any): Any
+
+ def genCode(start: String, stop: String, step: String)
+ (arr: String, elemType: String)
+ (implicit ctx: CodegenContext): String
+ }
+
+ private class IntegralSequenceImpl[T: ClassTag]
--- End diff --
If using that syntax where would I take ```Integral[T]``` instance to call methods ```fromInt(), toLong(), zero, one``` on?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org