You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/11/21 17:59:38 UTC
[11/50] [abbrv] carbondata git commit: [CARBONDATA-3060]Improve the
command for cli and fixed other issues
[CARBONDATA-3060]Improve the command for cli and fixed other issues
improve the syntax for CLI DDL: CarbonCli for table <table_name> options('-cmd summary/benchmark -a -s -v -c <column_name> -m')
This closes #2882
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e0963c16
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e0963c16
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e0963c16
Branch: refs/heads/branch-1.5
Commit: e0963c16bd67b12e3a82eabb9766275f4d2d9ef1
Parents: 4db5efd
Author: akashrn5 <ak...@gmail.com>
Authored: Mon Oct 29 18:14:24 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Wed Nov 21 22:39:53 2018 +0530
----------------------------------------------------------------------
.../schema/table/column/ColumnSchema.java | 9 ++
.../examples/sdk/CarbonReaderExample.java | 4 +-
.../TestNonTransactionalCarbonTable.scala | 2 +-
.../spark/sql/catalyst/CarbonDDLSqlParser.scala | 10 +-
.../command/management/CarbonCliCommand.scala | 62 +++++++++++++
.../management/CarbonShowSummaryCommand.scala | 62 -------------
.../sql/parser/CarbonSpark2SqlParser.scala | 12 +--
.../org/apache/carbondata/tool/CarbonCli.java | 21 ++++-
.../org/apache/carbondata/tool/DataFile.java | 15 ++-
.../org/apache/carbondata/tool/DataSummary.java | 98 +++++++++++---------
10 files changed, 172 insertions(+), 123 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
index 8eba101..cf7ecab 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
@@ -571,4 +571,13 @@ public class ColumnSchema implements Serializable, Writable {
}
this.isLocalDictColumn = in.readBoolean();
}
+
+ /**
+ * returns whether column is complex column based on column name for child column
+ * @return
+ */
+ public boolean isComplexColumn() {
+ return this.getColumnName()
+ .contains(".val") || this.getColumnName().contains(".");
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
----------------------------------------------------------------------
diff --git a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
index ef4ae7a..a2f2eee 100644
--- a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
+++ b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
@@ -61,7 +61,9 @@ public class CarbonReaderExample {
CarbonWriter writer = CarbonWriter.builder()
.outputPath(path)
.withLoadOptions(map)
- .withCsvInput(new Schema(fields)).build();
+ .withCsvInput(new Schema(fields))
+ .writtenBy("CarbonReaderExample")
+ .build();
for (int i = 0; i < 10; i++) {
String[] row2 = new String[]{
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index a3a3fc3..5b93553 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -389,7 +389,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
|'carbondata' LOCATION
|'$writerPath' """.stripMargin)
- val output = sql("show summary for table sdkOutputTable options('command'='-cmd,summary,-p,-a,-v,-c,age')").collect()
+ val output = sql("Carboncli for table sdkOutputTable options('-cmd summary -a -v -c age')").collect()
assert(output.toList.contains(Row("written_by Version ")))
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 107a303..4a37a20 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -188,7 +188,7 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
protected val STREAM = carbonKeyWord("STREAM")
protected val STREAMS = carbonKeyWord("STREAMS")
protected val STMPROPERTIES = carbonKeyWord("STMPROPERTIES")
- protected val SUMMARY = carbonKeyWord("SUMMARY")
+ protected val CARBONCLI = carbonKeyWord("CARBONCLI")
protected val doubleQuotedString = "\"([^\"]+)\"".r
protected val singleQuotedString = "'([^']+)'".r
@@ -1145,10 +1145,10 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
case _ => ("", "")
}
- protected lazy val summaryOptions: Parser[(String, String)] =
- (stringLit <~ "=") ~ stringLit ^^ {
- case opt ~ optvalue => (opt.trim.toLowerCase(), optvalue)
- case _ => ("", "")
+ protected lazy val commandOptions: Parser[String] =
+ stringLit ^^ {
+ case optValue => optValue
+ case _ => ""
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
new file mode 100644
index 0000000..bf5adc3
--- /dev/null
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCliCommand.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.management
+
+import java.util
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
+import org.apache.spark.sql.execution.command.{Checker, DataCommand}
+import org.apache.spark.sql.types.StringType
+
+import org.apache.carbondata.tool.CarbonCli
+
+/**
+ * CarbonCLi command class which is integrated to cli and sql support is provided via this class
+ * @param databaseNameOp
+ * @param tableName
+ * @param commandOptions
+ */
+case class CarbonCliCommand(
+ databaseNameOp: Option[String],
+ tableName: String,
+ commandOptions: String)
+ extends DataCommand {
+
+ override def output: Seq[Attribute] = {
+ Seq(AttributeReference("CarbonCli", StringType, nullable = false)())
+ }
+
+ override def processData(sparkSession: SparkSession): Seq[Row] = {
+ Checker.validateTableExists(databaseNameOp, tableName, sparkSession)
+ val carbonTable = CarbonEnv.getCarbonTable(databaseNameOp, tableName)(sparkSession)
+ val commandArgs: Seq[String] = commandOptions.split("\\s+")
+ val finalCommands = commandArgs.collect {
+ case a if a.trim.equalsIgnoreCase("summary") || a.trim.equalsIgnoreCase("benchmark") =>
+ Seq(a, "-p", carbonTable.getTablePath)
+ case x => Seq(x.trim)
+ }.flatten
+ val summaryOutput = new util.ArrayList[String]()
+ CarbonCli.run(finalCommands.toArray, summaryOutput)
+ summaryOutput.asScala.map(x =>
+ Row(x)
+ )
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala
deleted file mode 100644
index 461f31f..0000000
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.command.management
-
-import java.util
-
-import scala.collection.JavaConverters._
-
-import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.execution.command.{Checker, DataCommand}
-import org.apache.spark.sql.types.StringType
-
-import org.apache.carbondata.tool.CarbonCli
-
-/**
- * Show summary command class which is integrated to cli and sql support is provided via this class
- * @param databaseNameOp
- * @param tableName
- * @param commandOptions
- */
-case class CarbonShowSummaryCommand(
- databaseNameOp: Option[String],
- tableName: String,
- commandOptions: Map[String, String])
- extends DataCommand {
-
- override def output: Seq[Attribute] = {
- Seq(AttributeReference("Table Summary", StringType, nullable = false)())
- }
-
- override def processData(sparkSession: SparkSession): Seq[Row] = {
- Checker.validateTableExists(databaseNameOp, tableName, sparkSession)
- val carbonTable = CarbonEnv.getCarbonTable(databaseNameOp, tableName)(sparkSession)
- val commandArgs: Seq[String] = commandOptions("command").split(",")
- val finalCommands = commandArgs.collect {
- case a if a.trim.equalsIgnoreCase("-p") =>
- Seq(a, carbonTable.getTablePath)
- case x => Seq(x.trim)
- }.flatten
- val summaryOutput = new util.ArrayList[String]()
- CarbonCli.run(finalCommands.toArray, summaryOutput)
- summaryOutput.asScala.map(x =>
- Row(x)
- )
- }
-}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
index 5427168..39e2f30 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
@@ -497,18 +497,18 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
protected lazy val cli: Parser[LogicalPlan] =
- (SHOW ~> SUMMARY ~> FOR ~> TABLE) ~> (ident <~ ".").? ~ ident ~
- (OPTIONS ~> "(" ~> repsep(summaryOptions, ",") <~ ")").? <~
+ (CARBONCLI ~> FOR ~> TABLE) ~> (ident <~ ".").? ~ ident ~
+ (OPTIONS ~> "(" ~> commandOptions <~ ")").? <~
opt(";") ^^ {
case databaseName ~ tableName ~ commandList =>
- var commandOptions: Map[String, String] = null
+ var commandOptions: String = null
if (commandList.isDefined) {
- commandOptions = commandList.getOrElse(List.empty[(String, String)]).toMap
+ commandOptions = commandList.get
}
- CarbonShowSummaryCommand(
+ CarbonCliCommand(
convertDbNameToLowerCase(databaseName),
tableName.toLowerCase(),
- commandOptions.map { case (key, value) => key.toLowerCase -> value })
+ commandOptions)
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
----------------------------------------------------------------------
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
index bb2260b..6fc3128 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
@@ -19,6 +19,8 @@ package org.apache.carbondata.tool;
import java.io.IOException;
import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
@@ -148,7 +150,10 @@ public class CarbonCli {
outPuts = new ArrayList<>();
}
if (line.hasOption("h")) {
- printHelp(options);
+ collectHelpInfo(options);
+ for (String output : outPuts) {
+ out.println(output);
+ }
return;
}
@@ -167,7 +172,10 @@ public class CarbonCli {
} else {
out.println("command " + cmd + " is not supported");
outPuts.add("command " + cmd + " is not supported");
- printHelp(options);
+ collectHelpInfo(options);
+ for (String output : outPuts) {
+ out.println(output);
+ }
return;
}
@@ -186,9 +194,14 @@ public class CarbonCli {
}
}
- private static void printHelp(Options options) {
+ private static void collectHelpInfo(Options options) {
HelpFormatter formatter = new HelpFormatter();
- formatter.printHelp("CarbonCli", options);
+ StringWriter stringWriter = new StringWriter();
+ PrintWriter printWriter = new PrintWriter(stringWriter);
+ formatter.printHelp(printWriter, formatter.getWidth(), "CarbonCli", null, options,
+ formatter.getLeftPadding(), formatter.getDescPadding(), null, false);
+ printWriter.flush();
+ outPuts.add(stringWriter.toString());
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
----------------------------------------------------------------------
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
index 457ef0c..e553a78 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
@@ -33,6 +33,7 @@ import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.reader.CarbonFooterReaderV3;
import org.apache.carbondata.core.reader.CarbonHeaderReader;
@@ -446,7 +447,8 @@ class DataFile {
* @return result
*/
private double computePercentage(byte[] data, byte[] min, byte[] max, ColumnSchema column) {
- if (column.getDataType() == DataTypes.STRING) {
+ if (column.getDataType() == DataTypes.STRING || column.getDataType() == DataTypes.BOOLEAN
+ || column.hasEncoding(Encoding.DICTIONARY) || column.getDataType().isComplexType()) {
// for string, we do not calculate
return 0;
} else if (DataTypes.isDecimal(column.getDataType())) {
@@ -456,7 +458,16 @@ class DataFile {
return dataValue.divide(factorValue).doubleValue();
}
double dataValue, minValue, factorValue;
- if (column.getDataType() == DataTypes.SHORT) {
+ if (columnChunk.column.isDimensionColumn() &&
+ DataTypeUtil.isPrimitiveColumn(columnChunk.column.getDataType())) {
+ minValue = Double.valueOf(String.valueOf(
+ DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(min, column.getDataType())));
+ dataValue = Double.valueOf(String.valueOf(
+ DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(data, column.getDataType())));
+ factorValue = Double.valueOf(String.valueOf(
+ DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(max, column.getDataType())))
+ - minValue;
+ } else if (column.getDataType() == DataTypes.SHORT) {
minValue = ByteUtil.toShort(min, 0);
dataValue = ByteUtil.toShort(data, 0) - minValue;
factorValue = ByteUtil.toShort(max, 0) - ByteUtil.toShort(min, 0);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e0963c16/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
----------------------------------------------------------------------
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
index cd4c040..1930bf5 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
@@ -20,12 +20,7 @@ package org.apache.carbondata.tool;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import org.apache.carbondata.common.Strings;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
@@ -33,11 +28,11 @@ import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.reader.CarbonHeaderReader;
import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
-import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.format.BlockletInfo3;
@@ -80,7 +75,9 @@ class DataSummary implements Command {
}
if (line.hasOption("s") || printAll) {
if (dataFiles.size() > 0) {
- collectSchemaDetails(dataFiles.entrySet().iterator().next().getValue());
+ List<String> dataFilesSet = new ArrayList<>(dataFiles.keySet());
+ Collections.reverse(dataFilesSet);
+ collectSchemaDetails(dataFiles.get(dataFilesSet.get(0)));
}
}
if (line.hasOption("m") || printAll) {
@@ -175,8 +172,8 @@ class DataSummary implements Command {
tableFormatter.addRow(new String[]{
segment.getLoadName(),
segment.getSegmentStatus().toString(),
- new java.sql.Date(segment.getLoadStartTime()).toString(),
- new java.sql.Date(segment.getLoadEndTime()).toString(),
+ new java.sql.Timestamp(segment.getLoadStartTime()).toString(),
+ new java.sql.Timestamp(segment.getLoadEndTime()).toString(),
segment.getMergedLoadName() == null ? "NA" : segment.getMergedLoadName(),
segment.getFileFormat().toString(),
dataSize,
@@ -306,9 +303,9 @@ class DataSummary implements Command {
maxPercent = "NA";
// for complex types min max can be given as NA and for varchar where min max is not
// written, can give NA
- if (blocklet.getColumnChunk().column.getColumnName().contains(".val") || blocklet
- .getColumnChunk().column.getColumnName().contains(".") || !blocklet
- .getColumnChunk().isMinMaxPresent) {
+ if (blocklet.getColumnChunk().column.hasEncoding(Encoding.DICTIONARY) ||
+ blocklet.getColumnChunk().column.isComplexColumn() ||
+ !blocklet.getColumnChunk().isMinMaxPresent) {
min = "NA";
max = "NA";
} else {
@@ -316,26 +313,41 @@ class DataSummary implements Command {
max = new String(blockletMax, Charset.forName(DEFAULT_CHARSET));
}
} else {
- minPercent = String.format("%.1f", blocklet.getColumnChunk().getMinPercentage() * 100);
- maxPercent = String.format("%.1f", blocklet.getColumnChunk().getMaxPercentage() * 100);
+ // for column has global dictionary and for complex columns,min and max percentage can be
+ // NA
+ if (blocklet.getColumnChunk().column.hasEncoding(Encoding.DICTIONARY) ||
+ blocklet.getColumnChunk().column.isComplexColumn() ||
+ blocklet.getColumnChunk().column.getDataType().isComplexType()) {
+ minPercent = "NA";
+ maxPercent = "NA";
+ } else {
+ minPercent =
+ String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMinPercentage() * 100));
+ maxPercent =
+ String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMaxPercentage() * 100));
+ }
DataFile.ColumnChunk columnChunk = blocklet.columnChunk;
- if (columnChunk.column.isDimensionColumn() && DataTypeUtil
+ // need to consider dictionary and complex columns
+ if (columnChunk.column.hasEncoding(Encoding.DICTIONARY) ||
+ blocklet.getColumnChunk().column.isComplexColumn() ||
+ blocklet.getColumnChunk().column.getDataType().isComplexType()) {
+ min = "NA";
+ max = "NA";
+ } else if (columnChunk.column.isDimensionColumn() && DataTypeUtil
.isPrimitiveColumn(columnChunk.column.getDataType())) {
min = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMin,
columnChunk.column.getDataType()).toString();
max = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMax,
columnChunk.column.getDataType()).toString();
- } else {
- if (blockletMin.length > 4) {
- min = String.valueOf(ByteUtil.toLong(blockletMin, 0, blockletMin.length));
- } else {
- min = String.valueOf(ByteUtil.toInt(blockletMin, 0, blockletMin.length));
- }
- if (blockletMax.length > 4) {
- max = String.valueOf(ByteUtil.toLong(blockletMax, 0, blockletMax.length));
- } else {
- max = String.valueOf(ByteUtil.toInt(blockletMax, 0, blockletMax.length));
+ if (columnChunk.column.getDataType().equals(DataTypes.TIMESTAMP)) {
+ min = new java.sql.Timestamp(Long.parseLong(min) / 1000).toString();
+ max = new java.sql.Timestamp(Long.parseLong(max) / 1000).toString();
}
+ } else {
+ min = String.valueOf(DataTypeUtil
+ .getMeasureObjectFromDataType(blockletMin, columnChunk.column.getDataType()));
+ max = String.valueOf(DataTypeUtil
+ .getMeasureObjectFromDataType(blockletMax, columnChunk.column.getDataType()));
}
}
printer.addRow(
@@ -370,24 +382,26 @@ class DataSummary implements Command {
}
private void collectColumnChunkMeta(String columnName) throws IOException, MemoryException {
- DataFile file = dataFiles.entrySet().iterator().next().getValue();
- outPuts.add("");
- outPuts.add("## Page Meta for column '" + columnName + "' in file " + file.getFilePath());
- collectStats(columnName);
- for (int i = 0; i < file.getAllBlocklets().size(); i++) {
- DataFile.Blocklet blocklet = file.getAllBlocklets().get(i);
- DataChunk3 dataChunk3 = blocklet.getColumnChunk().getDataChunk3();
- List<DataChunk2> dataChunk2List = dataChunk3.getData_chunk_list();
- outPuts.add(String.format("Blocklet %d:", i));
+ for (Map.Entry<String, DataFile> entry : dataFiles.entrySet()) {
+ DataFile file = entry.getValue();
+ outPuts.add("");
+ outPuts.add("## Page Meta for column '" + columnName + "' in file " + file.getFilePath());
+ collectStats(columnName);
+ for (int i = 0; i < file.getAllBlocklets().size(); i++) {
+ DataFile.Blocklet blocklet = file.getAllBlocklets().get(i);
+ DataChunk3 dataChunk3 = blocklet.getColumnChunk().getDataChunk3();
+ List<DataChunk2> dataChunk2List = dataChunk3.getData_chunk_list();
+ outPuts.add(String.format("Blocklet %d:", i));
- // There will be many pages, for debugging purpose,
- // just print 3 page for each blocklet is enough
- for (int j = 0; j < dataChunk2List.size() && j < 3; j++) {
- outPuts.add(String.format("Page %d (offset %d, length %d): %s",
- j, dataChunk3.page_offset.get(j), dataChunk3.page_length.get(j),
- dataChunk2List.get(j).toString()));
+ // There will be many pages, for debugging purpose,
+ // just print 3 page for each blocklet is enough
+ for (int j = 0; j < dataChunk2List.size() && j < 3; j++) {
+ outPuts.add(String
+ .format("Page %d (offset %d, length %d): %s", j, dataChunk3.page_offset.get(j),
+ dataChunk3.page_length.get(j), dataChunk2List.get(j).toString()));
+ }
+ outPuts.add("");
}
- outPuts.add("");
}
}