You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2018/04/12 07:08:02 UTC

carbondata git commit: [CARBONDATA-2155][CARBONDATA-2152] [Presto] Fixed IS NULL not working correctly on string datatype with dictionary include in presto

Repository: carbondata
Updated Branches:
  refs/heads/master cfb8ed9f5 -> 687118a1c


[CARBONDATA-2155][CARBONDATA-2152] [Presto] Fixed IS NULL not working correctly on string datatype with dictionary include in presto

Fixed IS NULL not working correctly on string datatype with dictionary_include in presto integration,Fixed Min function working incorrectly for string type with dictionary include in presto integration

This closes #2152


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/687118a1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/687118a1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/687118a1

Branch: refs/heads/master
Commit: 687118a1cf7113aed4718bad3c87d8bed1fd49af
Parents: cfb8ed9
Author: anubhav100 <an...@knoldus.in>
Authored: Mon Apr 9 18:15:31 2018 +0530
Committer: chenliang613 <ch...@huawei.com>
Committed: Thu Apr 12 15:07:31 2018 +0800

----------------------------------------------------------------------
 .../CarbonDictionaryDecodeReadSupport.scala     |  7 +-
 .../presto/src/test/resources/alldatatype.csv   |  1 +
 .../integrationtest/PrestoAllDataTypeTest.scala | 59 ++++++++++++--
 .../presto/util/CarbonDataStoreCreator.scala    | 82 ++++++++++++--------
 4 files changed, 110 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala b/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala
index 9c05177..82cdf3a 100644
--- a/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala
+++ b/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala
@@ -93,7 +93,12 @@ class CarbonDictionaryDecodeReadSupport[T] extends CarbonReadSupport[T] {
     while (chunks.hasNext) {
       {
         val value: Array[Byte] = chunks.next
-        sliceArray(count) = wrappedBuffer(value, 0, value.length)
+        if(count ==1) {
+          sliceArray(count) = null
+        }
+        else {
+          sliceArray(count) = wrappedBuffer(value, 0, value.length)
+        }
         count += 1
       }
     }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/resources/alldatatype.csv
----------------------------------------------------------------------
diff --git a/integration/presto/src/test/resources/alldatatype.csv b/integration/presto/src/test/resources/alldatatype.csv
index 7201542..0fa46aa 100644
--- a/integration/presto/src/test/resources/alldatatype.csv
+++ b/integration/presto/src/test/resources/alldatatype.csv
@@ -9,3 +9,4 @@ ID,date,country,name,phonetype,serialname,salary,bonus,monthlyBonus,dob,shortfie
 8,2015-07-30,china,geetika,phone1848,ASD57308,15007.500,500.88,200.97,2008-09-21 11:10:06,10,true
 9,2015-07-18,china,ravindra,phone706,ASD86717,15008.00,700.999,45.25,2009-06-19 15:10:06,1,true
 9,2015/07/18,china,jitesh,phone706,ASD86717,15008.00,500.414,11.655,2001-08-29 13:09:03,12,true
+

http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala
----------------------------------------------------------------------
diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala
index 49da227..ce17682 100644
--- a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala
+++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala
@@ -34,6 +34,34 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
                                   + "../../../..").getCanonicalPath
   private val storePath = s"$rootPath/integration/presto/target/store"
 
+
+  // Table schema:
+  // +-------------+----------------+-------------+------------+
+  // | Column name | Data type      | Column type | Dictionary |
+  // +-------------+----------------+--------------+-----------+
+  // | id          | string         | dimension   | yes        |
+  // +-------------+----------------+-------------+------------+
+  // | date        | date           | dimension   | yes        |
+  // +-------------+----------------+-------------+------------+
+  // | country     | string         | dimension   | yes        |
+  // +-------------+----------------+-------------+-------------
+  // | name        | string         | dimension   | yes        |
+  // +-------------+----------------+-------------+-------------
+  // | phonetype   | string         | dimension   | yes        |
+  // +-------------+----------------+-------------+-------------
+  // | serialname  | string         | dimension   | true       |
+  // +-------------+----------------+-------------+-------------
+  // | bonus       |short decimal   | measure     | false      |
+  // +-------------+----------------+-------------+-------------
+  // | monthlyBonus| longdecimal    | measure     | false      |
+  // +-------------+----------------+-------------+-------------
+  // | dob         | timestamp      | dimension   | true       |
+  // +-------------+----------------+-------------+------------+
+  // | shortField  | shortfield     | measure     | true       |
+  // +-------------+----------------+-------------+-------------
+  // |isCurrentEmp | boolean        | measure     | true       |
+  // +-------------+----------------+-------------+------------+
+
   override def beforeAll: Unit = {
     import org.apache.carbondata.presto.util.CarbonDataStoreCreator
     CarbonDataStoreCreator
@@ -50,7 +78,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
   test("test the result for count(*) in presto") {
     val actualResult: List[Map[String, Any]] = PrestoServer
       .executeQuery("SELECT COUNT(*) AS RESULT FROM TESTDB.TESTTABLE ")
-    val expectedResult: List[Map[String, Any]] = List(Map("RESULT" -> 10))
+    val expectedResult: List[Map[String, Any]] = List(Map("RESULT" -> 11))
     assert(actualResult.equals(expectedResult))
   }
   test("test the result for count() clause with distinct operator in presto") {
@@ -160,7 +188,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
       Map("NAME" -> "liang"),
       Map("NAME" -> "prince"),
       Map("NAME" -> "ravindra"),
-      Map("NAME" -> "sahil"))
+      Map("NAME" -> "sahil"),
+      Map("NAME" -> null)
+    )
     assert(actualResult.equals(expectedResult))
   }
   test("select DATE type with order by clause") {
@@ -175,7 +205,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
       Map("DATE" -> "2015-07-28"),
       Map("DATE" -> "2015-07-29"),
       Map("DATE" -> "2015-07-30"),
-      Map("DATE" -> null))
+      Map("DATE" -> null),
+      Map("DATE" -> null)
+    )
 
     assert(actualResult.filterNot(_.get("DATE") == null).zipWithIndex.forall {
       case (map, index) => map.get("DATE").toString
@@ -194,7 +226,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
       Map("ID" -> 6),
       Map("ID" -> 7),
       Map("ID" -> 8),
-      Map("ID" -> 9))
+      Map("ID" -> 9),
+      Map("ID" -> null)
+    )
 
     assert(actualResult.equals(expectedResult))
 
@@ -345,7 +379,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
   test("test for null operator on date data type") {
     val actualResult: List[Map[String, Any]] = PrestoServer
       .executeQuery("SELECT ID FROM TESTDB.TESTTABLE WHERE DATE IS NULL")
-    val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 9))
+    val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 9),Map("ID" -> null))
     assert(actualResult.equals(expectedResult))
 
   }
@@ -390,7 +424,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
     val actualResult: List[Map[String, Any]] = PrestoServer
       .executeQuery(
         "SELECT ID from testdb.testtable WHERE SHORTFIELD IS NULL ORDER BY SHORTFIELD ")
-    val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 7))
+    val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 7),Map("ID" -> null))
 
     assert(actualResult.equals(expectedResult))
   }
@@ -450,4 +484,17 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
       .executeQuery("SELECT id AS RESULT FROM TESTDB.TESTTABLE WHERE isCurrentEmployee is NOT null AND ID>8")
     assert(actualResult.head("RESULT").toString.toInt==9)
   }
+  test("test the is null operator when null is included in string data type dictionary_include"){
+    // See CARBONDATA-2155
+    val actualResult: List[Map[String, Any]] = PrestoServer.executeQuery("SELECT SERIALNAME  FROM TESTDB.TESTTABLE WHERE SERIALNAME IS NULL")
+    assert(actualResult equals List(Map("SERIALNAME" -> null)))
+  }
+  test("test the min function when null is included in string data type with dictionary_include"){
+    // See CARBONDATA-2152
+    val actualResult = PrestoServer.executeQuery("SELECT MIN(SERIALNAME) FROM TESTDB.TESTTABLE")
+    val expectedResult = List(Map("_col0" -> "ASD14875"))
+
+    assert(actualResult.equals(expectedResult))
+  }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala
----------------------------------------------------------------------
diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala
index baf02fd..e12af63 100644
--- a/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala
+++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala
@@ -24,6 +24,7 @@ import java.util
 import java.util.{ArrayList, Date, List, UUID}
 
 import scala.collection.JavaConversions._
+import scala.collection.mutable
 
 import com.google.gson.Gson
 import org.apache.hadoop.conf.Configuration
@@ -34,34 +35,26 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 import org.apache.hadoop.mapreduce.{RecordReader, TaskType}
 
 import org.apache.carbondata.common.logging.LogServiceFactory
-import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryColumnUniqueIdentifier,
-ReverseDictionary}
+import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryColumnUniqueIdentifier, ReverseDictionary}
 import org.apache.carbondata.core.cache.{Cache, CacheProvider, CacheType}
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datastore.impl.FileFactory
-import org.apache.carbondata.core.fileoperations.{AtomicFileOperations, AtomicFileOperationsImpl,
-FileWriteOperation}
-import org.apache.carbondata.core.metadata.converter.{SchemaConverter,
-ThriftWrapperSchemaConverterImpl}
+import org.apache.carbondata.core.fileoperations.{AtomicFileOperations, AtomicFileOperationsImpl, FileWriteOperation}
+import org.apache.carbondata.core.metadata.converter.{SchemaConverter, ThriftWrapperSchemaConverterImpl}
 import org.apache.carbondata.core.metadata.datatype.DataTypes
 import org.apache.carbondata.core.metadata.encoder.Encoding
-import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, CarbonDimension,
-CarbonMeasure, ColumnSchema}
+import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, CarbonDimension, CarbonMeasure, ColumnSchema}
 import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo, TableSchema}
 import org.apache.carbondata.core.metadata.schema.{SchemaEvolution, SchemaEvolutionEntry}
-import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonMetadata,
-CarbonTableIdentifier, ColumnIdentifier}
+import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonMetadata, CarbonTableIdentifier, ColumnIdentifier}
 import org.apache.carbondata.core.statusmanager.{LoadMetadataDetails, SegmentStatus}
 import org.apache.carbondata.core.util.path.CarbonTablePath
 import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
-import org.apache.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter,
-CarbonDictionarySortIndexWriterImpl, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator}
-import org.apache.carbondata.core.writer.{CarbonDictionaryWriter, CarbonDictionaryWriterImpl,
-ThriftWriter}
+import org.apache.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter, CarbonDictionarySortIndexWriterImpl, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator}
+import org.apache.carbondata.core.writer.{CarbonDictionaryWriter, CarbonDictionaryWriterImpl, ThriftWriter}
 import org.apache.carbondata.processing.loading.DataLoadExecutor
 import org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants
-import org.apache.carbondata.processing.loading.csvinput.{BlockDetails, CSVInputFormat,
-CSVRecordReaderIterator, StringArrayWritable}
+import org.apache.carbondata.processing.loading.csvinput.{BlockDetails, CSVInputFormat, CSVRecordReaderIterator, StringArrayWritable}
 import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException
 import org.apache.carbondata.processing.loading.model.{CarbonDataLoadSchema, CarbonLoadModel}
 import org.apache.carbondata.processing.util.TableOptionConstant
@@ -266,8 +259,8 @@ object CarbonDataStoreCreator {
     val monthlyBonus: ColumnSchema = new ColumnSchema()
     monthlyBonus.setColumnName("monthlyBonus")
     monthlyBonus.setColumnar(true)
-    monthlyBonus.setDataType(DataTypes.createDecimalType(10, 4))
-    monthlyBonus.setPrecision(10)
+    monthlyBonus.setDataType(DataTypes.createDecimalType(18, 4))
+    monthlyBonus.setPrecision(18)
     monthlyBonus.setScale(4)
     monthlyBonus.setSchemaOrdinal(8)
     monthlyBonus.setEncodingList(invertedIndexEncoding)
@@ -358,33 +351,33 @@ object CarbonDataStoreCreator {
     val reader: BufferedReader = new BufferedReader(
       new FileReader(factFilePath))
     val header: String = reader.readLine()
-    val split: Array[String] = header.split(",")
     val allCols: util.List[CarbonColumn] = new util.ArrayList[CarbonColumn]()
-    val dims: util.List[CarbonDimension] =
+    val dimensions: util.List[CarbonDimension] =
       table.getDimensionByTableName(table.getTableName)
-    allCols.addAll(dims)
+    allCols.addAll(dimensions)
     val msrs: List[CarbonMeasure] =
       table.getMeasureByTableName(table.getTableName)
     allCols.addAll(msrs)
-    val set: Array[util.Set[String]] = Array.ofDim[util.Set[String]](dims.size)
-    val dimsIndex = dims.map(dim => dim.getColumnSchema.getSchemaOrdinal)
-    for (i <- set.indices) {
-      set(i) = new util.HashSet[String]()
+    val dimensionsIndex = dimensions.map(dim => dim.getColumnSchema.getSchemaOrdinal)
+    val dimensionSet: Array[util.List[String]] = Array.ofDim[util.List[String]](dimensions.size)
+
+    for (i <- dimensionSet.indices) {
+      dimensionSet(i) = new util.ArrayList[String]()
     }
     var line: String = reader.readLine()
     while (line != null) {
       val data: Array[String] = line.split(",")
-      for (i <- set.indices) {
-        set(i).add(data(dimsIndex(i)))
+      for (index <- dimensionSet.indices) {
+        addDictionaryValuesToDimensionSet(dimensions, dimensionsIndex, dimensionSet, data, index)
       }
       line = reader.readLine()
     }
     val dictCache: Cache[DictionaryColumnUniqueIdentifier, ReverseDictionary] = CacheProvider
       .getInstance.createCache(CacheType.REVERSE_DICTIONARY)
 
-    for (i <- set.indices) {
+    for (index <- dimensionSet.indices) {
       val columnIdentifier: ColumnIdentifier =
-        new ColumnIdentifier(dims.get(i).getColumnId, null, null)
+        new ColumnIdentifier(dimensions.get(index).getColumnId, null, null)
 
       val dictionaryColumnUniqueIdentifier: DictionaryColumnUniqueIdentifier =
         new DictionaryColumnUniqueIdentifier(
@@ -393,7 +386,7 @@ object CarbonDataStoreCreator {
           columnIdentifier.getDataType)
       val writer: CarbonDictionaryWriter = new CarbonDictionaryWriterImpl(
         dictionaryColumnUniqueIdentifier)
-      for (value <- set(i)) {
+      for (value <- dimensionSet(index).distinct) {
         writer.write(value)
       }
       writer.close()
@@ -403,7 +396,7 @@ object CarbonDataStoreCreator {
           new DictionaryColumnUniqueIdentifier(
             absoluteTableIdentifier,
             columnIdentifier,
-            dims.get(i).getDataType)
+            dimensions.get(index).getDataType)
         )
         .asInstanceOf[Dictionary]
       val preparator: CarbonDictionarySortInfoPreparator =
@@ -412,7 +405,7 @@ object CarbonDataStoreCreator {
       val dictionarySortInfo: CarbonDictionarySortInfo =
         preparator.getDictionarySortInfo(newDistinctValues,
           dict,
-          dims.get(i).getDataType)
+          dimensions.get(index).getDataType)
       val carbonDictionaryWriter: CarbonDictionarySortIndexWriter =
         new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier)
       try {
@@ -433,6 +426,31 @@ object CarbonDataStoreCreator {
   }
 
 
+  private def addDictionaryValuesToDimensionSet(dims: util.List[CarbonDimension],
+      dimensionIndex: mutable.Buffer[Int],
+      dimensionSet: Array[util.List[String]],
+      data: Array[String],
+      index: Int) = {
+    if (isDictionaryDefaultMember(dims, dimensionSet, index)) {
+      dimensionSet(index).add(CarbonCommonConstants.MEMBER_DEFAULT_VAL)
+      dimensionSet(index).add(data(dimensionIndex(index)))
+    }
+    else {
+      if (data.length == 1) {
+        dimensionSet(index).add("""\N""")
+      } else {
+        dimensionSet(index).add(data(dimensionIndex(index)))
+      }
+    }
+  }
+
+  private def isDictionaryDefaultMember(dims: util.List[CarbonDimension],
+      dimensionSet: Array[util.List[String]],
+      index: Int) = {
+    dimensionSet(index).isEmpty && dims(index).hasEncoding(Encoding.DICTIONARY) &&
+    !dims(index).hasEncoding(Encoding.DIRECT_DICTIONARY)
+  }
+
   /**
    * Execute graph which will further load data
    *