You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by wajda <gi...@git.apache.org> on 2018/05/04 16:25:44 UTC

[GitHub] spark pull request #21155: [SPARK-23927][SQL] Add "sequence" expression

Github user wajda commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21155#discussion_r186134394
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala ---
    @@ -1059,3 +1063,316 @@ case class Flatten(child: Expression) extends UnaryExpression {
     
       override def prettyName: String = "flatten"
     }
    +
    +@ExpressionDescription(
    +  usage = """
    +    _FUNC_(start, stop, step) -
    +          Generates an array of elements from start to stop (inclusive), incrementing by step.
    +          The type of the returned elements is the same as the type of argument expressions.
    +
    +          Supported types are: byte, short, integer, long, date, timestamp.
    +
    +          The start and stop expressions must resolve to the same type.
    +          If start and stop expressions resolve to the 'date' or 'timestamp' type
    +          then the step expression must resolve to the 'interval' type, otherwise to the same type
    +          as the start and stop expressions.
    +  """,
    +  arguments = """
    +    Arguments:
    +      * start - an expression. The start of the range.
    +      * stop  - an expression. The end the range (inclusive).
    +      * step  - an optional expression. The step of the range.
    +
    +          By default step is 1 if start is less than or equal to stop, otherwise -1.
    +          For the temporal sequences it's 1 day and -1 day respectively.
    +
    +          If start is greater than stop then the step must be negative, and vice versa.
    +  """,
    +  examples = """
    +    Examples:
    +          > SELECT _FUNC_(1, 5);
    +           [1, 2, 3, 4, 5]
    +          > SELECT _FUNC_(5, 1);
    +           [5, 4, 3, 2, 1]
    +          > SELECT _FUNC_(to_date('2018-01-01'), to_date('2018-03-01'), interval 1 month);
    +           [2018-01-01, 2018-02-01, 2018-03-01]
    +  """,
    +  since = "2.4.0"
    +)
    +case class Sequence(left: Expression,
    +                    middle: Expression,
    +                    right: Expression,
    +                    timeZoneId: Option[String] = None)
    +  extends TernaryExpression with TimeZoneAwareExpression {
    +
    +  import Sequence._
    +
    +  def this(arg0: Expression, arg1: Expression) =
    +    this(arg0, arg1, Sequence.defaultStepExpression(arg0, arg1), None)
    +
    +  def this(arg0: Expression, arg1: Expression, arg2: Expression) =
    +    this(arg0, arg1, arg2, None)
    +
    +  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    +    copy(timeZoneId = Some(timeZoneId))
    +
    +  override def children: Seq[Expression] = Seq(left, middle, right)
    +
    +  override lazy val dataType: ArrayType = ArrayType(left.dataType, containsNull = false)
    +
    +  override def checkInputDataTypes(): TypeCheckResult = {
    +    val Seq(startType, stopType, stepType) = children.map(_.dataType)
    +    val typesCorrect =
    +      startType.sameType(stopType) &&
    +        (startType match {
    +          case TimestampType | DateType => CalendarIntervalType acceptsType stepType
    +          case _: IntegralType => stepType sameType startType
    +          case _ => false
    +        })
    +
    +    if (typesCorrect) {
    +      TypeCheckResult.TypeCheckSuccess
    +    } else {
    +      TypeCheckResult.TypeCheckFailure(
    +        s"$prettyName only supports integral, timestamp or date types")
    +    }
    +  }
    +
    +  private lazy val impl: SequenceImpl = dataType.elementType match {
    +    case iType: IntegralType =>
    +      type T = iType.InternalType
    +      val ct = ClassTag[T](iType.tag.mirror.runtimeClass(iType.tag.tpe))
    +      new IntegralSequenceImpl()(ct, iType.integral)
    +
    +    case TimestampType =>
    +      new TemporalSequenceImpl[Long](LongType, 1, identity, timeZone)
    +
    +    case DateType =>
    +      new TemporalSequenceImpl[Int](IntegerType, MICROS_PER_DAY, _.toInt, timeZone)
    +  }
    +
    +  override protected def nullSafeEval(input1: Any, input2: Any, input3: Any): Any = {
    +    ArrayData.toArrayData(impl.eval(input1, input2, input3))
    +  }
    +
    +  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    +    nullSafeCodeGen(ctx, ev, (start, stop, step) => {
    +      val arr = ctx.freshName("arr")
    +      val arrElemType = CodeGenerator.javaType(dataType.elementType)
    +      s"""
    +         |  final $arrElemType[] $arr = null;
    +         |  ${impl.genCode(start, stop, step)(arr, arrElemType)(ctx)}
    +         |  ${ev.value} = UnsafeArrayData.fromPrimitiveArray($arr);
    +       """.stripMargin
    +    })
    +  }
    +}
    +
    +object Sequence {
    +
    +  private trait SequenceImpl {
    +    def eval(input1: Any, input2: Any, input3: Any): Any
    +
    +    def genCode(start: String, stop: String, step: String)
    +               (arr: String, elemType: String)
    +               (implicit ctx: CodegenContext): String
    +  }
    +
    +  private class IntegralSequenceImpl[T: ClassTag]
    --- End diff --
    
    If using that syntax where would I take ```Integral[T]``` instance to call methods ```fromInt(), toLong(), zero, one``` on?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org