You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/10/08 02:58:13 UTC

[GitHub] [spark] HyukjinKwon commented on a change in pull request #29973: [SPARK-33082][SPARK-20202][BUILD][SQL][FOLLOW-UP] Remove Hive 1.2 workarounds and Hive 1.2 profile in Jenkins script

HyukjinKwon commented on a change in pull request #29973:
URL: https://github.com/apache/spark/pull/29973#discussion_r501419736



##########
File path: sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
##########
@@ -64,185 +64,7 @@ import org.apache.spark.sql.types._
  */
 private[orc] object OrcFilters extends Logging {
 
-  private def findMethod(klass: Class[_], name: String, args: Class[_]*): Method = {
-    val method = klass.getMethod(name, args: _*)
-    method.setAccessible(true)
-    method
-  }
-
   def createFilter(schema: StructType, filters: Array[Filter]): Option[SearchArgument] = {
-    DatasourceOrcFilters.createFilter(schema, filters).asInstanceOf[Option[SearchArgument]]
-  }
-
-  def convertibleFilters(
-      schema: StructType,
-      dataTypeMap: Map[String, DataType],
-      filters: Seq[Filter]): Seq[Filter] = {
-    import org.apache.spark.sql.sources._
-
-    def convertibleFiltersHelper(
-        filter: Filter,
-        canPartialPushDown: Boolean): Option[Filter] = filter match {
-      // At here, it is not safe to just convert one side and remove the other side
-      // if we do not understand what the parent filters are.
-      //
-      // Here is an example used to explain the reason.
-      // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
-      // convert b in ('1'). If we only convert a = 2, we will end up with a filter
-      // NOT(a = 2), which will generate wrong results.
-      //
-      // Pushing one side of AND down is only safe to do at the top level or in the child
-      // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate
-      // can be safely removed.
-      case And(left, right) =>
-        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
-        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
-        (leftResultOptional, rightResultOptional) match {
-          case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, rightResult))
-          case (Some(leftResult), None) if canPartialPushDown => Some(leftResult)
-          case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult)
-          case _ => None
-        }
-
-      // The Or predicate is convertible when both of its children can be pushed down.
-      // That is to say, if one/both of the children can be partially pushed down, the Or
-      // predicate can be partially pushed down as well.
-      //
-      // Here is an example used to explain the reason.
-      // Let's say we have
-      // (a1 AND a2) OR (b1 AND b2),
-      // a1 and b1 is convertible, while a2 and b2 is not.
-      // The predicate can be converted as
-      // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
-      // As per the logical in And predicate, we can push down (a1 OR b1).
-      case Or(left, right) =>
-        for {
-          lhs <- convertibleFiltersHelper(left, canPartialPushDown)
-          rhs <- convertibleFiltersHelper(right, canPartialPushDown)
-        } yield Or(lhs, rhs)
-      case Not(pred) =>
-        val childResultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false)
-        childResultOptional.map(Not)
-      case other =>
-        for (_ <- buildLeafSearchArgument(dataTypeMap, other, newBuilder())) yield other
-    }
-    filters.flatMap { filter =>
-      convertibleFiltersHelper(filter, true)
-    }
-  }
-
-  /**
-   * Build a SearchArgument and return the builder so far.
-   *
-   * @param dataTypeMap a map from the attribute name to its data type.
-   * @param expression the input predicates, which should be fully convertible to SearchArgument.
-   * @param builder the input SearchArgument.Builder.
-   * @return the builder so far.
-   */
-  private def buildSearchArgument(
-      dataTypeMap: Map[String, DataType],
-      expression: Filter,
-      builder: Builder): Builder = {
-    expression match {
-      case And(left, right) =>
-        val lhs = buildSearchArgument(dataTypeMap, left, builder.startAnd())
-        val rhs = buildSearchArgument(dataTypeMap, right, lhs)
-        rhs.end()
-
-      case Or(left, right) =>
-        val lhs = buildSearchArgument(dataTypeMap, left, builder.startOr())
-        val rhs = buildSearchArgument(dataTypeMap, right, lhs)
-        rhs.end()
-
-      case Not(child) =>
-        buildSearchArgument(dataTypeMap, child, builder.startNot()).end()
-
-      case other =>
-        buildLeafSearchArgument(dataTypeMap, other, builder).getOrElse {
-          throw new SparkException(
-            "The input filter of OrcFilters.buildSearchArgument should be fully convertible.")
-        }
-    }
-  }
-
-  /**
-   * Build a SearchArgument for a leaf predicate and return the builder so far.
-   *
-   * @param dataTypeMap a map from the attribute name to its data type.
-   * @param expression the input filter predicates.
-   * @param builder the input SearchArgument.Builder.
-   * @return the builder so far.
-   */
-  private def buildLeafSearchArgument(
-      dataTypeMap: Map[String, DataType],
-      expression: Filter,
-      builder: Builder): Option[Builder] = {
-    def isSearchableType(dataType: DataType): Boolean = dataType match {
-      // Only the values in the Spark types below can be recognized by
-      // the `SearchArgumentImpl.BuilderImpl.boxLiteral()` method.
-      case ByteType | ShortType | FloatType | DoubleType => true
-      case IntegerType | LongType | StringType | BooleanType => true
-      case TimestampType | _: DecimalType => true
-      case _ => false
-    }
-
-    import org.apache.spark.sql.sources._
-
-    // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
-    // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
-    // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
-    expression match {
-      // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
-      // call is mandatory.  ORC `SearchArgument` builder requires that all leaf predicates must be
-      // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
-
-      case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "equals", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "nullSafeEquals", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "lessThan", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "lessThanEquals", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startNot()
-        val method = findMethod(bd.getClass, "lessThanEquals", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startNot()
-        val method = findMethod(bd.getClass, "lessThan", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "isNull", classOf[String])
-        Some(method.invoke(bd, attribute).asInstanceOf[Builder].end())
-
-      case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startNot()
-        val method = findMethod(bd.getClass, "isNull", classOf[String])
-        Some(method.invoke(bd, attribute).asInstanceOf[Builder].end())
-
-      case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "in", classOf[String], classOf[Array[Object]])
-        Some(method.invoke(bd, attribute, values.map(_.asInstanceOf[AnyRef]))
-          .asInstanceOf[Builder].end())
-
-      case _ => None
-    }
+    DatasourceOrcFilters.createFilter(schema, filters)

Review comment:
       It seems not used anymore after https://github.com/apache/spark/commit/a127387a53e1a24e76de83c5a1858fcdbd38c3a2#diff-6cac9bc2656e3782b0312dceb8c55d47L77-L88




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org