You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2016/08/01 10:05:24 UTC

[26/47] incubator-carbondata git commit: [Bug] clean redundancy code of dictionary generation(#871)

[Bug] clean redundancy code of dictionary generation(#871)

[Bug] clean redundancy code of dictionary generation(#871)

Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/14a46b22
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/14a46b22
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/14a46b22

Branch: refs/heads/master
Commit: 14a46b22dc397f9960fcef7b3fec6a9780f637a5
Parents: e367e12
Author: Zhangshunyu <zh...@huawei.com>
Authored: Thu Jul 28 14:19:34 2016 +0800
Committer: david <qi...@qq.com>
Committed: Thu Jul 28 14:19:34 2016 +0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/CarbonSqlParser.scala  |  8 ++-
 .../spark/rdd/CarbonGlobalDictionaryRDD.scala   |  3 --
 .../spark/tasks/DictionaryWriterTask.scala      | 54 ++++++--------------
 .../spark/util/GlobalDictionaryUtil.scala       |  5 +-
 4 files changed, 19 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/14a46b22/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
index fdfc683..0fd841d 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
@@ -354,11 +354,9 @@ class CarbonSqlParser()
             case list@Token("TOK_TABCOLLIST", _) =>
               val cols = BaseSemanticAnalyzer.getColumns(list, true)
               if (cols != null) {
-                val dupColsGrp = cols.asScala
-                                   .groupBy(x => x.getName) filter { case (_, colList) => colList
-                                                                                            .size >
-                                                                                          1
-                                 }
+                val dupColsGrp = cols.asScala.groupBy(x => x.getName) filter {
+                  case (_, colList) => colList.size > 1
+                }
                 if (dupColsGrp.size > 0) {
                   var columnName: String = ""
                   dupColsGrp.toSeq.foreach(columnName += _._1 + ", ")

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/14a46b22/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
index 9a94c5b..e0e8cbf 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
@@ -33,15 +33,12 @@ import org.apache.spark.sql.Row
 import org.carbondata.common.factory.CarbonCommonFactory
 import org.carbondata.common.logging.LogServiceFactory
 import org.carbondata.core.carbon.{CarbonTableIdentifier, ColumnIdentifier}
-import org.carbondata.core.carbon.metadata.datatype.DataType
-import org.carbondata.core.carbon.metadata.encoder.Encoding
 import org.carbondata.core.carbon.metadata.schema.table.column.CarbonDimension
 import org.carbondata.core.constants.CarbonCommonConstants
 import org.carbondata.core.datastorage.store.impl.FileFactory
 import org.carbondata.core.locks.CarbonLockFactory
 import org.carbondata.core.locks.LockUsage
 import org.carbondata.core.util.CarbonTimeStatisticsFactory
-import org.carbondata.processing.etl.DataLoadingException
 import org.carbondata.spark.load.{CarbonLoaderUtil, CarbonLoadModel}
 import org.carbondata.spark.partition.reader.{CSVParser, CSVReader}
 import org.carbondata.spark.tasks.DictionaryWriterTask

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/14a46b22/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala b/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
index 380e76b..b62558d 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
@@ -61,54 +61,30 @@ class DictionaryWriterTask(valuesBuffer: mutable.HashSet[String],
       }
 
       if (values.length >= 1) {
-        var preValue = values(0)
         if (model.dictFileExists(columnIndex)) {
-          if (dictionary.getSurrogateKey(values(0)) == CarbonCommonConstants
-            .INVALID_SURROGATE_KEY) {
-            val parsedValue = org.carbondata.core.util.DataTypeUtil
-              .normalizeColumnValueForItsDataType(values(0),
-                model.primDimensions(columnIndex))
-            if (null != parsedValue) {
-              writer.write(parsedValue)
-              distinctValues.add(parsedValue)
-            }
-          }
-          for (i <- 1 until values.length) {
-            if (preValue != values(i)) {
-              if (dictionary.getSurrogateKey(values(i)) ==
-                  CarbonCommonConstants.INVALID_SURROGATE_KEY) {
-                val parsedValue = org.carbondata.core.util.DataTypeUtil
-                  .normalizeColumnValueForItsDataType(values(i),
-                    model.primDimensions(columnIndex))
-                if (null != parsedValue) {
-                  writer.write(parsedValue)
-                  distinctValues.add(parsedValue)
-                  preValue = values(i)
-                }
-              }
-            }
-          }
-
-        } else {
-          val parsedValue = org.carbondata.core.util.DataTypeUtil
-            .normalizeColumnValueForItsDataType(values(0),
-              model.primDimensions(columnIndex))
-          if (null != parsedValue) {
-            writer.write(parsedValue)
-            distinctValues.add(parsedValue)
-          }
-          for (i <- 1 until values.length) {
-            if (preValue != values(i)) {
+          for (value <- values) {
+            if (dictionary.getSurrogateKey(value) ==
+                CarbonCommonConstants.INVALID_SURROGATE_KEY) {
               val parsedValue = org.carbondata.core.util.DataTypeUtil
-                .normalizeColumnValueForItsDataType(values(i),
+                .normalizeColumnValueForItsDataType(value,
                   model.primDimensions(columnIndex))
               if (null != parsedValue) {
                 writer.write(parsedValue)
                 distinctValues.add(parsedValue)
-                preValue = values(i)
               }
             }
           }
+
+        } else {
+          for (value <- values) {
+            val parsedValue = org.carbondata.core.util.DataTypeUtil
+              .normalizeColumnValueForItsDataType(value,
+                model.primDimensions(columnIndex))
+            if (null != parsedValue) {
+              writer.write(parsedValue)
+              distinctValues.add(parsedValue)
+            }
+          }
         }
       }
     } catch {

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/14a46b22/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
index 8ad1204..18e777d 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
@@ -22,7 +22,6 @@ import java.nio.charset.Charset
 import java.util.regex.Pattern
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.language.implicitConversions
 import scala.util.control.Breaks.{break, breakable}
@@ -48,12 +47,11 @@ import org.carbondata.core.datastorage.store.impl.FileFactory
 import org.carbondata.core.reader.CarbonDictionaryReader
 import org.carbondata.core.util.CarbonProperties
 import org.carbondata.core.writer.CarbonDictionaryWriter
-import org.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator}
 import org.carbondata.processing.etl.DataLoadingException
 import org.carbondata.spark.load.CarbonLoaderUtil
 import org.carbondata.spark.load.CarbonLoadModel
 import org.carbondata.spark.partition.reader.CSVWriter
-import org.carbondata.spark.rdd.{ArrayParser, CarbonAllDictionaryCombineRDD, CarbonBlockDistinctValuesCombineRDD, CarbonColumnDictGenerateRDD, CarbonDataRDDFactory, CarbonGlobalDictionaryGenerateRDD, ColumnPartitioner, DataFormat, DictionaryLoadModel, GenericParser, PrimitiveParser, StructParser}
+import org.carbondata.spark.rdd._
 import org.carbondata.spark.CarbonSparkFactory
 
 /**
@@ -100,7 +98,6 @@ object GlobalDictionaryUtil extends Logging {
       encoding: Encoding,
       excludeEncoding: Encoding): Boolean = {
     if (dimension.isComplex()) {
-      var has = false
       val children = dimension.getListOfChildDimensions
       children.asScala.exists(hasEncoding(_, encoding, excludeEncoding))
     } else {