You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2018/04/12 07:08:02 UTC
carbondata git commit: [CARBONDATA-2155][CARBONDATA-2152] [Presto]
Fixed IS NULL not working correctly on string datatype with dictionary
include in presto
Repository: carbondata
Updated Branches:
refs/heads/master cfb8ed9f5 -> 687118a1c
[CARBONDATA-2155][CARBONDATA-2152] [Presto] Fixed IS NULL not working correctly on string datatype with dictionary include in presto
Fixed IS NULL not working correctly on string datatype with dictionary_include in presto integration,Fixed Min function working incorrectly for string type with dictionary include in presto integration
This closes #2152
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/687118a1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/687118a1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/687118a1
Branch: refs/heads/master
Commit: 687118a1cf7113aed4718bad3c87d8bed1fd49af
Parents: cfb8ed9
Author: anubhav100 <an...@knoldus.in>
Authored: Mon Apr 9 18:15:31 2018 +0530
Committer: chenliang613 <ch...@huawei.com>
Committed: Thu Apr 12 15:07:31 2018 +0800
----------------------------------------------------------------------
.../CarbonDictionaryDecodeReadSupport.scala | 7 +-
.../presto/src/test/resources/alldatatype.csv | 1 +
.../integrationtest/PrestoAllDataTypeTest.scala | 59 ++++++++++++--
.../presto/util/CarbonDataStoreCreator.scala | 82 ++++++++++++--------
4 files changed, 110 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala b/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala
index 9c05177..82cdf3a 100644
--- a/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala
+++ b/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala
@@ -93,7 +93,12 @@ class CarbonDictionaryDecodeReadSupport[T] extends CarbonReadSupport[T] {
while (chunks.hasNext) {
{
val value: Array[Byte] = chunks.next
- sliceArray(count) = wrappedBuffer(value, 0, value.length)
+ if(count ==1) {
+ sliceArray(count) = null
+ }
+ else {
+ sliceArray(count) = wrappedBuffer(value, 0, value.length)
+ }
count += 1
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/resources/alldatatype.csv
----------------------------------------------------------------------
diff --git a/integration/presto/src/test/resources/alldatatype.csv b/integration/presto/src/test/resources/alldatatype.csv
index 7201542..0fa46aa 100644
--- a/integration/presto/src/test/resources/alldatatype.csv
+++ b/integration/presto/src/test/resources/alldatatype.csv
@@ -9,3 +9,4 @@ ID,date,country,name,phonetype,serialname,salary,bonus,monthlyBonus,dob,shortfie
8,2015-07-30,china,geetika,phone1848,ASD57308,15007.500,500.88,200.97,2008-09-21 11:10:06,10,true
9,2015-07-18,china,ravindra,phone706,ASD86717,15008.00,700.999,45.25,2009-06-19 15:10:06,1,true
9,2015/07/18,china,jitesh,phone706,ASD86717,15008.00,500.414,11.655,2001-08-29 13:09:03,12,true
+
http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala
----------------------------------------------------------------------
diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala
index 49da227..ce17682 100644
--- a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala
+++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala
@@ -34,6 +34,34 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
+ "../../../..").getCanonicalPath
private val storePath = s"$rootPath/integration/presto/target/store"
+
+ // Table schema:
+ // +-------------+----------------+-------------+------------+
+ // | Column name | Data type | Column type | Dictionary |
+ // +-------------+----------------+--------------+-----------+
+ // | id | string | dimension | yes |
+ // +-------------+----------------+-------------+------------+
+ // | date | date | dimension | yes |
+ // +-------------+----------------+-------------+------------+
+ // | country | string | dimension | yes |
+ // +-------------+----------------+-------------+-------------
+ // | name | string | dimension | yes |
+ // +-------------+----------------+-------------+-------------
+ // | phonetype | string | dimension | yes |
+ // +-------------+----------------+-------------+-------------
+ // | serialname | string | dimension | true |
+ // +-------------+----------------+-------------+-------------
+ // | bonus |short decimal | measure | false |
+ // +-------------+----------------+-------------+-------------
+ // | monthlyBonus| longdecimal | measure | false |
+ // +-------------+----------------+-------------+-------------
+ // | dob | timestamp | dimension | true |
+ // +-------------+----------------+-------------+------------+
+ // | shortField | shortfield | measure | true |
+ // +-------------+----------------+-------------+-------------
+ // |isCurrentEmp | boolean | measure | true |
+ // +-------------+----------------+-------------+------------+
+
override def beforeAll: Unit = {
import org.apache.carbondata.presto.util.CarbonDataStoreCreator
CarbonDataStoreCreator
@@ -50,7 +78,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
test("test the result for count(*) in presto") {
val actualResult: List[Map[String, Any]] = PrestoServer
.executeQuery("SELECT COUNT(*) AS RESULT FROM TESTDB.TESTTABLE ")
- val expectedResult: List[Map[String, Any]] = List(Map("RESULT" -> 10))
+ val expectedResult: List[Map[String, Any]] = List(Map("RESULT" -> 11))
assert(actualResult.equals(expectedResult))
}
test("test the result for count() clause with distinct operator in presto") {
@@ -160,7 +188,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
Map("NAME" -> "liang"),
Map("NAME" -> "prince"),
Map("NAME" -> "ravindra"),
- Map("NAME" -> "sahil"))
+ Map("NAME" -> "sahil"),
+ Map("NAME" -> null)
+ )
assert(actualResult.equals(expectedResult))
}
test("select DATE type with order by clause") {
@@ -175,7 +205,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
Map("DATE" -> "2015-07-28"),
Map("DATE" -> "2015-07-29"),
Map("DATE" -> "2015-07-30"),
- Map("DATE" -> null))
+ Map("DATE" -> null),
+ Map("DATE" -> null)
+ )
assert(actualResult.filterNot(_.get("DATE") == null).zipWithIndex.forall {
case (map, index) => map.get("DATE").toString
@@ -194,7 +226,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
Map("ID" -> 6),
Map("ID" -> 7),
Map("ID" -> 8),
- Map("ID" -> 9))
+ Map("ID" -> 9),
+ Map("ID" -> null)
+ )
assert(actualResult.equals(expectedResult))
@@ -345,7 +379,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
test("test for null operator on date data type") {
val actualResult: List[Map[String, Any]] = PrestoServer
.executeQuery("SELECT ID FROM TESTDB.TESTTABLE WHERE DATE IS NULL")
- val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 9))
+ val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 9),Map("ID" -> null))
assert(actualResult.equals(expectedResult))
}
@@ -390,7 +424,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
val actualResult: List[Map[String, Any]] = PrestoServer
.executeQuery(
"SELECT ID from testdb.testtable WHERE SHORTFIELD IS NULL ORDER BY SHORTFIELD ")
- val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 7))
+ val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 7),Map("ID" -> null))
assert(actualResult.equals(expectedResult))
}
@@ -450,4 +484,17 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll {
.executeQuery("SELECT id AS RESULT FROM TESTDB.TESTTABLE WHERE isCurrentEmployee is NOT null AND ID>8")
assert(actualResult.head("RESULT").toString.toInt==9)
}
+ test("test the is null operator when null is included in string data type dictionary_include"){
+ // See CARBONDATA-2155
+ val actualResult: List[Map[String, Any]] = PrestoServer.executeQuery("SELECT SERIALNAME FROM TESTDB.TESTTABLE WHERE SERIALNAME IS NULL")
+ assert(actualResult equals List(Map("SERIALNAME" -> null)))
+ }
+ test("test the min function when null is included in string data type with dictionary_include"){
+ // See CARBONDATA-2152
+ val actualResult = PrestoServer.executeQuery("SELECT MIN(SERIALNAME) FROM TESTDB.TESTTABLE")
+ val expectedResult = List(Map("_col0" -> "ASD14875"))
+
+ assert(actualResult.equals(expectedResult))
+ }
+
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala
----------------------------------------------------------------------
diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala
index baf02fd..e12af63 100644
--- a/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala
+++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala
@@ -24,6 +24,7 @@ import java.util
import java.util.{ArrayList, Date, List, UUID}
import scala.collection.JavaConversions._
+import scala.collection.mutable
import com.google.gson.Gson
import org.apache.hadoop.conf.Configuration
@@ -34,34 +35,26 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
import org.apache.hadoop.mapreduce.{RecordReader, TaskType}
import org.apache.carbondata.common.logging.LogServiceFactory
-import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryColumnUniqueIdentifier,
-ReverseDictionary}
+import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryColumnUniqueIdentifier, ReverseDictionary}
import org.apache.carbondata.core.cache.{Cache, CacheProvider, CacheType}
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.datastore.impl.FileFactory
-import org.apache.carbondata.core.fileoperations.{AtomicFileOperations, AtomicFileOperationsImpl,
-FileWriteOperation}
-import org.apache.carbondata.core.metadata.converter.{SchemaConverter,
-ThriftWrapperSchemaConverterImpl}
+import org.apache.carbondata.core.fileoperations.{AtomicFileOperations, AtomicFileOperationsImpl, FileWriteOperation}
+import org.apache.carbondata.core.metadata.converter.{SchemaConverter, ThriftWrapperSchemaConverterImpl}
import org.apache.carbondata.core.metadata.datatype.DataTypes
import org.apache.carbondata.core.metadata.encoder.Encoding
-import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, CarbonDimension,
-CarbonMeasure, ColumnSchema}
+import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, CarbonDimension, CarbonMeasure, ColumnSchema}
import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo, TableSchema}
import org.apache.carbondata.core.metadata.schema.{SchemaEvolution, SchemaEvolutionEntry}
-import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonMetadata,
-CarbonTableIdentifier, ColumnIdentifier}
+import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonMetadata, CarbonTableIdentifier, ColumnIdentifier}
import org.apache.carbondata.core.statusmanager.{LoadMetadataDetails, SegmentStatus}
import org.apache.carbondata.core.util.path.CarbonTablePath
import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
-import org.apache.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter,
-CarbonDictionarySortIndexWriterImpl, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator}
-import org.apache.carbondata.core.writer.{CarbonDictionaryWriter, CarbonDictionaryWriterImpl,
-ThriftWriter}
+import org.apache.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter, CarbonDictionarySortIndexWriterImpl, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator}
+import org.apache.carbondata.core.writer.{CarbonDictionaryWriter, CarbonDictionaryWriterImpl, ThriftWriter}
import org.apache.carbondata.processing.loading.DataLoadExecutor
import org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants
-import org.apache.carbondata.processing.loading.csvinput.{BlockDetails, CSVInputFormat,
-CSVRecordReaderIterator, StringArrayWritable}
+import org.apache.carbondata.processing.loading.csvinput.{BlockDetails, CSVInputFormat, CSVRecordReaderIterator, StringArrayWritable}
import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException
import org.apache.carbondata.processing.loading.model.{CarbonDataLoadSchema, CarbonLoadModel}
import org.apache.carbondata.processing.util.TableOptionConstant
@@ -266,8 +259,8 @@ object CarbonDataStoreCreator {
val monthlyBonus: ColumnSchema = new ColumnSchema()
monthlyBonus.setColumnName("monthlyBonus")
monthlyBonus.setColumnar(true)
- monthlyBonus.setDataType(DataTypes.createDecimalType(10, 4))
- monthlyBonus.setPrecision(10)
+ monthlyBonus.setDataType(DataTypes.createDecimalType(18, 4))
+ monthlyBonus.setPrecision(18)
monthlyBonus.setScale(4)
monthlyBonus.setSchemaOrdinal(8)
monthlyBonus.setEncodingList(invertedIndexEncoding)
@@ -358,33 +351,33 @@ object CarbonDataStoreCreator {
val reader: BufferedReader = new BufferedReader(
new FileReader(factFilePath))
val header: String = reader.readLine()
- val split: Array[String] = header.split(",")
val allCols: util.List[CarbonColumn] = new util.ArrayList[CarbonColumn]()
- val dims: util.List[CarbonDimension] =
+ val dimensions: util.List[CarbonDimension] =
table.getDimensionByTableName(table.getTableName)
- allCols.addAll(dims)
+ allCols.addAll(dimensions)
val msrs: List[CarbonMeasure] =
table.getMeasureByTableName(table.getTableName)
allCols.addAll(msrs)
- val set: Array[util.Set[String]] = Array.ofDim[util.Set[String]](dims.size)
- val dimsIndex = dims.map(dim => dim.getColumnSchema.getSchemaOrdinal)
- for (i <- set.indices) {
- set(i) = new util.HashSet[String]()
+ val dimensionsIndex = dimensions.map(dim => dim.getColumnSchema.getSchemaOrdinal)
+ val dimensionSet: Array[util.List[String]] = Array.ofDim[util.List[String]](dimensions.size)
+
+ for (i <- dimensionSet.indices) {
+ dimensionSet(i) = new util.ArrayList[String]()
}
var line: String = reader.readLine()
while (line != null) {
val data: Array[String] = line.split(",")
- for (i <- set.indices) {
- set(i).add(data(dimsIndex(i)))
+ for (index <- dimensionSet.indices) {
+ addDictionaryValuesToDimensionSet(dimensions, dimensionsIndex, dimensionSet, data, index)
}
line = reader.readLine()
}
val dictCache: Cache[DictionaryColumnUniqueIdentifier, ReverseDictionary] = CacheProvider
.getInstance.createCache(CacheType.REVERSE_DICTIONARY)
- for (i <- set.indices) {
+ for (index <- dimensionSet.indices) {
val columnIdentifier: ColumnIdentifier =
- new ColumnIdentifier(dims.get(i).getColumnId, null, null)
+ new ColumnIdentifier(dimensions.get(index).getColumnId, null, null)
val dictionaryColumnUniqueIdentifier: DictionaryColumnUniqueIdentifier =
new DictionaryColumnUniqueIdentifier(
@@ -393,7 +386,7 @@ object CarbonDataStoreCreator {
columnIdentifier.getDataType)
val writer: CarbonDictionaryWriter = new CarbonDictionaryWriterImpl(
dictionaryColumnUniqueIdentifier)
- for (value <- set(i)) {
+ for (value <- dimensionSet(index).distinct) {
writer.write(value)
}
writer.close()
@@ -403,7 +396,7 @@ object CarbonDataStoreCreator {
new DictionaryColumnUniqueIdentifier(
absoluteTableIdentifier,
columnIdentifier,
- dims.get(i).getDataType)
+ dimensions.get(index).getDataType)
)
.asInstanceOf[Dictionary]
val preparator: CarbonDictionarySortInfoPreparator =
@@ -412,7 +405,7 @@ object CarbonDataStoreCreator {
val dictionarySortInfo: CarbonDictionarySortInfo =
preparator.getDictionarySortInfo(newDistinctValues,
dict,
- dims.get(i).getDataType)
+ dimensions.get(index).getDataType)
val carbonDictionaryWriter: CarbonDictionarySortIndexWriter =
new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier)
try {
@@ -433,6 +426,31 @@ object CarbonDataStoreCreator {
}
+ private def addDictionaryValuesToDimensionSet(dims: util.List[CarbonDimension],
+ dimensionIndex: mutable.Buffer[Int],
+ dimensionSet: Array[util.List[String]],
+ data: Array[String],
+ index: Int) = {
+ if (isDictionaryDefaultMember(dims, dimensionSet, index)) {
+ dimensionSet(index).add(CarbonCommonConstants.MEMBER_DEFAULT_VAL)
+ dimensionSet(index).add(data(dimensionIndex(index)))
+ }
+ else {
+ if (data.length == 1) {
+ dimensionSet(index).add("""\N""")
+ } else {
+ dimensionSet(index).add(data(dimensionIndex(index)))
+ }
+ }
+ }
+
+ private def isDictionaryDefaultMember(dims: util.List[CarbonDimension],
+ dimensionSet: Array[util.List[String]],
+ index: Int) = {
+ dimensionSet(index).isEmpty && dims(index).hasEncoding(Encoding.DICTIONARY) &&
+ !dims(index).hasEncoding(Encoding.DIRECT_DICTIONARY)
+ }
+
/**
* Execute graph which will further load data
*