You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2016/08/01 10:05:24 UTC
[26/47] incubator-carbondata git commit: [Bug] clean redundancy code
of dictionary generation(#871)
[Bug] clean redundancy code of dictionary generation(#871)
[Bug] clean redundancy code of dictionary generation(#871)
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/14a46b22
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/14a46b22
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/14a46b22
Branch: refs/heads/master
Commit: 14a46b22dc397f9960fcef7b3fec6a9780f637a5
Parents: e367e12
Author: Zhangshunyu <zh...@huawei.com>
Authored: Thu Jul 28 14:19:34 2016 +0800
Committer: david <qi...@qq.com>
Committed: Thu Jul 28 14:19:34 2016 +0800
----------------------------------------------------------------------
.../org/apache/spark/sql/CarbonSqlParser.scala | 8 ++-
.../spark/rdd/CarbonGlobalDictionaryRDD.scala | 3 --
.../spark/tasks/DictionaryWriterTask.scala | 54 ++++++--------------
.../spark/util/GlobalDictionaryUtil.scala | 5 +-
4 files changed, 19 insertions(+), 51 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/14a46b22/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
index fdfc683..0fd841d 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
@@ -354,11 +354,9 @@ class CarbonSqlParser()
case list@Token("TOK_TABCOLLIST", _) =>
val cols = BaseSemanticAnalyzer.getColumns(list, true)
if (cols != null) {
- val dupColsGrp = cols.asScala
- .groupBy(x => x.getName) filter { case (_, colList) => colList
- .size >
- 1
- }
+ val dupColsGrp = cols.asScala.groupBy(x => x.getName) filter {
+ case (_, colList) => colList.size > 1
+ }
if (dupColsGrp.size > 0) {
var columnName: String = ""
dupColsGrp.toSeq.foreach(columnName += _._1 + ", ")
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/14a46b22/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
index 9a94c5b..e0e8cbf 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
@@ -33,15 +33,12 @@ import org.apache.spark.sql.Row
import org.carbondata.common.factory.CarbonCommonFactory
import org.carbondata.common.logging.LogServiceFactory
import org.carbondata.core.carbon.{CarbonTableIdentifier, ColumnIdentifier}
-import org.carbondata.core.carbon.metadata.datatype.DataType
-import org.carbondata.core.carbon.metadata.encoder.Encoding
import org.carbondata.core.carbon.metadata.schema.table.column.CarbonDimension
import org.carbondata.core.constants.CarbonCommonConstants
import org.carbondata.core.datastorage.store.impl.FileFactory
import org.carbondata.core.locks.CarbonLockFactory
import org.carbondata.core.locks.LockUsage
import org.carbondata.core.util.CarbonTimeStatisticsFactory
-import org.carbondata.processing.etl.DataLoadingException
import org.carbondata.spark.load.{CarbonLoaderUtil, CarbonLoadModel}
import org.carbondata.spark.partition.reader.{CSVParser, CSVReader}
import org.carbondata.spark.tasks.DictionaryWriterTask
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/14a46b22/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala b/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
index 380e76b..b62558d 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
@@ -61,54 +61,30 @@ class DictionaryWriterTask(valuesBuffer: mutable.HashSet[String],
}
if (values.length >= 1) {
- var preValue = values(0)
if (model.dictFileExists(columnIndex)) {
- if (dictionary.getSurrogateKey(values(0)) == CarbonCommonConstants
- .INVALID_SURROGATE_KEY) {
- val parsedValue = org.carbondata.core.util.DataTypeUtil
- .normalizeColumnValueForItsDataType(values(0),
- model.primDimensions(columnIndex))
- if (null != parsedValue) {
- writer.write(parsedValue)
- distinctValues.add(parsedValue)
- }
- }
- for (i <- 1 until values.length) {
- if (preValue != values(i)) {
- if (dictionary.getSurrogateKey(values(i)) ==
- CarbonCommonConstants.INVALID_SURROGATE_KEY) {
- val parsedValue = org.carbondata.core.util.DataTypeUtil
- .normalizeColumnValueForItsDataType(values(i),
- model.primDimensions(columnIndex))
- if (null != parsedValue) {
- writer.write(parsedValue)
- distinctValues.add(parsedValue)
- preValue = values(i)
- }
- }
- }
- }
-
- } else {
- val parsedValue = org.carbondata.core.util.DataTypeUtil
- .normalizeColumnValueForItsDataType(values(0),
- model.primDimensions(columnIndex))
- if (null != parsedValue) {
- writer.write(parsedValue)
- distinctValues.add(parsedValue)
- }
- for (i <- 1 until values.length) {
- if (preValue != values(i)) {
+ for (value <- values) {
+ if (dictionary.getSurrogateKey(value) ==
+ CarbonCommonConstants.INVALID_SURROGATE_KEY) {
val parsedValue = org.carbondata.core.util.DataTypeUtil
- .normalizeColumnValueForItsDataType(values(i),
+ .normalizeColumnValueForItsDataType(value,
model.primDimensions(columnIndex))
if (null != parsedValue) {
writer.write(parsedValue)
distinctValues.add(parsedValue)
- preValue = values(i)
}
}
}
+
+ } else {
+ for (value <- values) {
+ val parsedValue = org.carbondata.core.util.DataTypeUtil
+ .normalizeColumnValueForItsDataType(value,
+ model.primDimensions(columnIndex))
+ if (null != parsedValue) {
+ writer.write(parsedValue)
+ distinctValues.add(parsedValue)
+ }
+ }
}
}
} catch {
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/14a46b22/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
index 8ad1204..18e777d 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
@@ -22,7 +22,6 @@ import java.nio.charset.Charset
import java.util.regex.Pattern
import scala.collection.JavaConverters._
-import scala.collection.mutable
import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
import scala.language.implicitConversions
import scala.util.control.Breaks.{break, breakable}
@@ -48,12 +47,11 @@ import org.carbondata.core.datastorage.store.impl.FileFactory
import org.carbondata.core.reader.CarbonDictionaryReader
import org.carbondata.core.util.CarbonProperties
import org.carbondata.core.writer.CarbonDictionaryWriter
-import org.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator}
import org.carbondata.processing.etl.DataLoadingException
import org.carbondata.spark.load.CarbonLoaderUtil
import org.carbondata.spark.load.CarbonLoadModel
import org.carbondata.spark.partition.reader.CSVWriter
-import org.carbondata.spark.rdd.{ArrayParser, CarbonAllDictionaryCombineRDD, CarbonBlockDistinctValuesCombineRDD, CarbonColumnDictGenerateRDD, CarbonDataRDDFactory, CarbonGlobalDictionaryGenerateRDD, ColumnPartitioner, DataFormat, DictionaryLoadModel, GenericParser, PrimitiveParser, StructParser}
+import org.carbondata.spark.rdd._
import org.carbondata.spark.CarbonSparkFactory
/**
@@ -100,7 +98,6 @@ object GlobalDictionaryUtil extends Logging {
encoding: Encoding,
excludeEncoding: Encoding): Boolean = {
if (dimension.isComplex()) {
- var has = false
val children = dimension.getListOfChildDimensions
children.asScala.exists(hasEncoding(_, encoding, excludeEncoding))
} else {