You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2018/08/07 13:09:53 UTC

[29/50] [abbrv] carbondata git commit: [CARBONDATA-2792][schema restructure] Create external table fails post schema restructure.

[CARBONDATA-2792][schema restructure] Create external table fails post schema restructure.

Problem
Once the table schema is restructured "(do column drop and column add)".
The api org.apache.carbondata.spark.util.CarbonSparkUtil.getRawSchema(carbonRelation: CarbonRelation) : String
to get visible columns raw schema string in ascending order of their ordinal value throws
ArrayIndexOutOfBoundException while creating external table.
The api prepares the array of raw column schema of visible columns in the ascending order of schema ordinal.
It uses the schemaOrdinal as index to prepare the array, so the colum having the schemaOrdinal value more
than the visible column count will cause the ArrayIndexOutOfBoundException.

Solution
Filter the visible and valid columns
sort the columns based on the schema ordinal.
Prepare the raw column schema based on the sorted columns index.

This closes #2571


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b483a574
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b483a574
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b483a574

Branch: refs/heads/external-format
Commit: b483a57464a860fe9dbd2d074a3e08fd59141edc
Parents: 625a2ef
Author: mohammadshahidkhan <mo...@gmail.com>
Authored: Fri Jul 27 12:51:49 2018 +0530
Committer: kunal642 <ku...@gmail.com>
Committed: Thu Aug 2 16:38:02 2018 +0530

----------------------------------------------------------------------
 .../createTable/TestCreateExternalTable.scala         | 14 ++++++++++++++
 .../carbondata/spark/util/CarbonSparkUtil.scala       | 14 +++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/b483a574/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
index a9b8d57..6fb24c7 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
@@ -32,6 +32,8 @@ class TestCreateExternalTable extends QueryTest with BeforeAndAfterAll {
 
   override def beforeAll(): Unit = {
     sql("DROP TABLE IF EXISTS origin")
+    sql("drop table IF EXISTS rsext")
+    sql("drop table IF EXISTS rstest1")
     // create carbon table and insert data
     sql("CREATE TABLE origin(key INT, value STRING) STORED BY 'carbondata'")
     sql("INSERT INTO origin select 100,'spark'")
@@ -41,6 +43,8 @@ class TestCreateExternalTable extends QueryTest with BeforeAndAfterAll {
 
   override def afterAll(): Unit = {
     sql("DROP TABLE IF EXISTS origin")
+    sql("drop table IF EXISTS rsext")
+    sql("drop table IF EXISTS rstest1")
   }
 
   test("create external table with existing files") {
@@ -111,5 +115,15 @@ class TestCreateExternalTable extends QueryTest with BeforeAndAfterAll {
     }
     assert(exception.getMessage().contains("Create external table as select"))
   }
+  test("create external table with post schema resturcture") {
+    sql("create table rstest1 (c1 string,c2 int) STORED BY 'org.apache.carbondata.format'")
+    sql("Alter table rstest1 drop columns(c2)")
+    sql(
+      "Alter table rstest1 add columns(c4 string) TBLPROPERTIES('DICTIONARY_EXCLUDE'='c4', " +
+      "'DEFAULT.VALUE.c4'='def')")
+    sql(s"""CREATE EXTERNAL TABLE rsext STORED BY 'carbondata' LOCATION '$storeLocation/rstest1'""")
+    sql("insert into rsext select 'shahid', 1")
+    checkAnswer(sql("select * from rstest1"),  sql("select * from rsext"))
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/b483a574/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/CarbonSparkUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/CarbonSparkUtil.scala b/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/CarbonSparkUtil.scala
index b9e2442..a0c0545 100644
--- a/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/CarbonSparkUtil.scala
+++ b/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/CarbonSparkUtil.scala
@@ -18,13 +18,14 @@
 package org.apache.carbondata.spark.util
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 import org.apache.spark.sql.hive.{CarbonMetaData, CarbonRelation, DictionaryMap}
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.encoder.Encoding
 import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo}
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn
+import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, ColumnSchema}
 import org.apache.carbondata.core.util.CarbonUtil
 
 case class TransformHolder(rdd: Any, mataData: CarbonMetaData)
@@ -87,18 +88,21 @@ object CarbonSparkUtil {
     val fields = new Array[String](
       carbonRelation.dimensionsAttr.size + carbonRelation.measureAttr.size)
     val carbonTable = carbonRelation.carbonTable
+    val columnSchemas: mutable.Buffer[ColumnSchema] = carbonTable.getTableInfo.getFactTable.
+      getListOfColumns.asScala
+      .filter(cSchema => !cSchema.isInvisible && cSchema.getSchemaOrdinal != -1).
+      sortWith(_.getSchemaOrdinal < _.getSchemaOrdinal)
+    val columnList = columnSchemas.toList.asJava
     carbonRelation.dimensionsAttr.foreach(attr => {
-      val carbonDimension = carbonTable.getDimensionByName(carbonRelation.tableName, attr.name)
       val carbonColumn = carbonTable.getColumnByName(carbonRelation.tableName, attr.name)
       val columnComment = getColumnComment(carbonColumn)
-      fields(carbonDimension.getSchemaOrdinal) =
+      fields(columnList.indexOf(carbonColumn.getColumnSchema)) =
         '`' + attr.name + '`' + ' ' + attr.dataType.catalogString + columnComment
     })
     carbonRelation.measureAttr.foreach(msrAtrr => {
-      val carbonMeasure = carbonTable.getMeasureByName(carbonRelation.tableName, msrAtrr.name)
       val carbonColumn = carbonTable.getColumnByName(carbonRelation.tableName, msrAtrr.name)
       val columnComment = getColumnComment(carbonColumn)
-      fields(carbonMeasure.getSchemaOrdinal) =
+      fields(columnList.indexOf(carbonColumn.getColumnSchema)) =
         '`' + msrAtrr.name + '`' + ' ' + msrAtrr.dataType.catalogString + columnComment
     })
     fields.mkString(",")