You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/10/23 20:08:55 UTC

[GitHub] [spark] imback82 commented on a change in pull request #26214: [SPARK-29558][SQL] ResolveTables and ResolveRelations should be order-insensitive

imback82 commented on a change in pull request #26214: [SPARK-29558][SQL] ResolveTables and ResolveRelations should be order-insensitive
URL: https://github.com/apache/spark/pull/26214#discussion_r338229705
 
 

 ##########
 File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 ##########
 @@ -755,46 +743,61 @@ class Analyzer(
     }
 
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
-      case i @ InsertIntoStatement(u @ UnresolvedRelation(AsTableIdentifier(ident)), _, child, _, _)
-          if child.resolved =>
-        EliminateSubqueryAliases(lookupTableFromCatalog(ident, u)) match {
+      case i @ InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) if i.query.resolved =>
+        EliminateSubqueryAliases(lookupTableFromCatalog(u)) match {
           case v: View =>
             u.failAnalysis(s"Inserting into a view is not allowed. View: ${v.desc.identifier}.")
           case other => i.copy(table = other)
         }
+
       case u: UnresolvedRelation => resolveRelation(u)
     }
 
-    // Look up the table with the given name from catalog. The database we used is decided by the
-    // precedence:
-    // 1. Use the database part of the table identifier, if it is defined;
-    // 2. Use defaultDatabase, if it is defined(In this case, no temporary objects can be used,
-    //    and the default database is only used to look up a view);
-    // 3. Use the currentDb of the SessionCatalog.
+    // Look up the table with the given name from catalog. If `defaultDatabase` is defined, we
+    // expand the table name with `defaultDatabase`. Then we follow this rule to look up the table:
+    //  1. If the table name has only 1 part:
+    //     - Try resolve the table name to a temp view. If not found, expand the table name with
+    //       current namespace and look up the table from current catalog.
+    //  2. If the table name has more than 1 parts:
+    //     - If the table name has 2 parts and the first name part is equal to the global temp view
+    //       database name, resolve the table name to a global temp view.
+    //     - If the first name part matches a registered v2 catalog, look up the table from that
+    //       catalog.
+    //     - Otherwise, look up the table from the current catalog.
     private def lookupTableFromCatalog(
-        tableIdentifier: TableIdentifier,
         u: UnresolvedRelation,
         defaultDatabase: Option[String] = None): LogicalPlan = {
-      val tableIdentWithDb = tableIdentifier.copy(
-        database = tableIdentifier.database.orElse(defaultDatabase))
-      try {
-        v1SessionCatalog.lookupRelation(tableIdentWithDb)
-      } catch {
-        case _: NoSuchTableException | _: NoSuchDatabaseException =>
-          u
+      val nameParts = u.multipartIdentifier
+      assert(nameParts.nonEmpty)
+      val expandedNameParts = defaultDatabase.toSeq ++ nameParts
+      if (expandedNameParts.length == 1) {
+        val tblName = expandedNameParts.head
+        v1SessionCatalog.lookupTempView(tblName).orElse {
+          val tableName = catalogManager.currentNamespace ++ expandedNameParts
+          loadTable(catalogManager.currentCatalog.asTableCatalog, tableName)
+        }.getOrElse(u)
+      } else {
+        val maybeGlobalTempView = if (expandedNameParts.length == 2) {
+          v1SessionCatalog.lookupGlobalTempView(expandedNameParts.head, expandedNameParts.last)
+        } else {
+          None
+        }
+        maybeGlobalTempView.orElse {
+          val CatalogAndIdentifierParts(resolvedCatalog, restNameParts) = expandedNameParts
+          loadTable(resolvedCatalog, restNameParts)
+        }.getOrElse(u)
       }
     }
 
-    // If the database part is specified, and we support running SQL directly on files, and
-    // it's not a temporary view, and the table does not exist, then let's just return the
-    // original UnresolvedRelation. It is possible we are matching a query like "select *
-    // from parquet.`/path/to/query`". The plan will get resolved in the rule `ResolveDataSource`.
-    // Note that we are testing (!db_exists || !table_exists) because the catalog throws
-    // an exception from tableExists if the database does not exist.
-    private def isRunningDirectlyOnFiles(table: TableIdentifier): Boolean = {
-      table.database.isDefined && conf.runSQLonFile && !v1SessionCatalog.isTemporaryTable(table) &&
-        (!v1SessionCatalog.databaseExists(table.database.get)
-          || !v1SessionCatalog.tableExists(table))
+    private def loadTable(tableCatalog: CatalogPlugin, name: Seq[String]): Option[LogicalPlan] = {
+      if (CatalogV2Util.isSessionCatalog(tableCatalog)) {
+        CatalogV2Util.loadTable(tableCatalog, name.asIdentifier).map {
+          case v1Table: V1Table => v1SessionCatalog.createV1Relation(v1Table.v1Table)
+          case v2Table => DataSourceV2Relation.create(v2Table)
+        }
+      } else {
+        CatalogV2Util.loadTable(tableCatalog, name.asIdentifier).map(DataSourceV2Relation.create)
 
 Review comment:
   nit: you can move `CatalogV2Util.loadTable(tableCatalog, name.asIdentifier)` out of if-else block.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org