You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2018/12/28 11:32:30 UTC
carbondata git commit: [CARBONDATA-3195]Added validation for Inverted
Index columns and added a test case in case of varchar
Repository: carbondata
Updated Branches:
refs/heads/master d85d54324 -> f5c1b7bbd
[CARBONDATA-3195]Added validation for Inverted Index columns and added a test case in case of varchar
This PR is to add a validation for inverted index when inverted index columns
are not present in the sort columns they should throw a exception.
Also added a test case in case when varchar columns are passed as inverted index.
This closes #3020
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f5c1b7bb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f5c1b7bb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f5c1b7bb
Branch: refs/heads/master
Commit: f5c1b7bbd2485e1186e3a7c718d3f539599905a5
Parents: d85d543
Author: shardul-cr7 <sh...@gmail.com>
Authored: Mon Dec 24 12:51:16 2018 +0530
Committer: kumarvishal09 <ku...@gmail.com>
Committed: Fri Dec 28 17:01:56 2018 +0530
----------------------------------------------------------------------
docs/ddl-of-carbondata.md | 4 +++-
.../dataload/TestNoInvertedIndexLoadAndQuery.scala | 8 ++++----
.../longstring/VarcharDataTypesBasicTestCase.scala | 13 +++++++++++++
.../apache/spark/sql/catalyst/CarbonDDLSqlParser.scala | 10 ++++++++++
4 files changed, 30 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f5c1b7bb/docs/ddl-of-carbondata.md
----------------------------------------------------------------------
diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
index 3d3db1e..d1a4794 100644
--- a/docs/ddl-of-carbondata.md
+++ b/docs/ddl-of-carbondata.md
@@ -126,9 +126,11 @@ CarbonData DDL statements are documented here,which includes:
By default inverted index is disabled as store size will be reduced, it can be enabled by using a table property. It might help to improve compression ratio and query speed, especially for low cardinality columns which are in reward position.
Suggested use cases : For high cardinality columns, you can disable the inverted index for improving the data loading performance.
+
+ **NOTE**: Columns specified in INVERTED_INDEX should also be present in SORT_COLUMNS.
```
- TBLPROPERTIES ('NO_INVERTED_INDEX'='column1', 'INVERTED_INDEX'='column2, column3')
+ TBLPROPERTIES ('SORT_COLUMNS'='column2,column3','NO_INVERTED_INDEX'='column1', 'INVERTED_INDEX'='column2, column3')
```
- ##### Sort Columns Configuration
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f5c1b7bb/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
index 13f8adb..f483827 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
@@ -305,7 +305,7 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll {
CREATE TABLE IF NOT EXISTS index1
(id Int, name String, city String)
STORED BY 'org.apache.carbondata.format'
- TBLPROPERTIES('DICTIONARY_INCLUDE'='id','INVERTED_INDEX'='city,name')
+ TBLPROPERTIES('DICTIONARY_INCLUDE'='id','INVERTED_INDEX'='city,name', 'SORT_COLUMNS'='city,name')
""")
sql(
s"""
@@ -333,14 +333,14 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll {
CREATE TABLE IF NOT EXISTS index1
(id Int, name String, city String)
STORED BY 'org.apache.carbondata.format'
- TBLPROPERTIES('INVERTED_INDEX'='city,name,id')
+ TBLPROPERTIES('INVERTED_INDEX'='city,name,id','SORT_COLUMNS'='city,name,id')
""")
val carbonTable = CarbonMetadata.getInstance().getCarbonTable("default", "index1")
assert(carbonTable.getColumnByName("index1", "city").getColumnSchema.getEncodingList
.contains(Encoding.INVERTED_INDEX))
assert(carbonTable.getColumnByName("index1", "name").getColumnSchema.getEncodingList
.contains(Encoding.INVERTED_INDEX))
- assert(!carbonTable.getColumnByName("index1", "id").getColumnSchema.getEncodingList
+ assert(carbonTable.getColumnByName("index1", "id").getColumnSchema.getEncodingList
.contains(Encoding.INVERTED_INDEX))
}
@@ -352,7 +352,7 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll {
CREATE TABLE IF NOT EXISTS index1
(id Int, name String, city String)
STORED BY 'org.apache.carbondata.format'
- TBLPROPERTIES('NO_INVERTED_INDEX'='city','INVERTED_INDEX'='city')
+ TBLPROPERTIES('NO_INVERTED_INDEX'='city','INVERTED_INDEX'='city','SORT_COLUMNS'='city')
""")
}
assert(exception.getMessage
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f5c1b7bb/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
index a96f7df..3148cac 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
@@ -191,6 +191,19 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi
assert(exceptionCaught.getMessage.contains("both in no_inverted_index and long_string_columns"))
}
+ test("inverted index columns cannot be present in long_string_cols as they do not support sort_cols") {
+ val exceptionCaught = intercept[MalformedCarbonCommandException] {
+ sql(
+ s"""
+ | CREATE TABLE if not exists $longStringTable(
+ | id INT, name STRING, description STRING, address STRING, note STRING
+ | ) STORED BY 'carbondata'
+ | TBLPROPERTIES('inverted_index'='note', 'long_string_columns'='note,description')
+ |""".stripMargin)
+ }
+ assert(exceptionCaught.getMessage.contains("INVERTED_INDEX column: note should be present in SORT_COLUMNS"))
+ }
+
private def prepareTable(): Unit = {
sql(
s"""
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f5c1b7bb/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 35bf335..7d5c170 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -374,6 +374,16 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
// get inverted index columns from table properties
val invertedIdxCols = extractInvertedIndexColumns(fields, tableProperties)
+ // Validate if columns present in inverted index are part of sort columns.
+ if (invertedIdxCols.nonEmpty) {
+ invertedIdxCols.foreach { column =>
+ if (!sortKeyDims.contains(column)) {
+ val errMsg = "INVERTED_INDEX column: " + column + " should be present in SORT_COLUMNS"
+ throw new MalformedCarbonCommandException(errMsg)
+ }
+ }
+ }
+
// check for any duplicate columns in inverted and noinverted columns defined in tblproperties
if (invertedIdxCols.nonEmpty && noInvertedIdxCols.nonEmpty) {
invertedIdxCols.foreach { distCol =>