You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/05/31 05:13:30 UTC
[carbondata] branch master updated: [CARBONDATA-3349] support
carboncli show sort_columns for a segment folder
This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new be3e2a8 [CARBONDATA-3349] support carboncli show sort_columns for a segment folder
be3e2a8 is described below
commit be3e2a80685a89ab37881f6400b708e71e5cb206
Author: QiangCai <qi...@qq.com>
AuthorDate: Wed May 15 16:16:54 2019 +0800
[CARBONDATA-3349] support carboncli show sort_columns for a segment folder
carboncli support to show sort_columns for a segment folder.
carboncli -cmd sort_columns -p <segment_folder>
This closes #3183
---
.../blockletindex/SegmentIndexFileStore.java | 36 ++++++++++++++
.../TestAlterTableSortColumnsProperty.scala | 45 ++++++++++++++++-
.../java/org/apache/carbondata/tool/CarbonCli.java | 24 +++++++++-
.../org/apache/carbondata/tool/FileCollector.java | 56 ++++++++++++++++++++++
.../org/apache/carbondata/tool/CarbonCliTest.java | 13 +++++
5 files changed, 170 insertions(+), 4 deletions(-)
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java
index ae5be68..4351f3a 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -44,6 +45,7 @@ import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataFileFooterConverter;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.format.BlockIndex;
+import org.apache.carbondata.format.IndexHeader;
import org.apache.carbondata.format.MergedBlockIndex;
import org.apache.carbondata.format.MergedBlockIndexHeader;
@@ -515,4 +517,38 @@ public class SegmentIndexFileStore {
public Map<String, List<String>> getCarbonMergeFileToIndexFilesMap() {
return carbonMergeFileToIndexFilesMap;
}
+
+ public static IndexHeader readIndexHeader(String indexFilePath, Configuration configuration) {
+ byte[] indexContent = null;
+ if (indexFilePath.toLowerCase().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
+ SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore(configuration);
+ try {
+ indexFileStore.readMergeFile(indexFilePath);
+ } catch (IOException ex) {
+ LOGGER.error(ex);
+ }
+ Iterator<Map.Entry<String, byte[]>> iterator =
+ indexFileStore.getCarbonIndexMap().entrySet().iterator();
+ if (iterator.hasNext()) {
+ indexContent = iterator.next().getValue();
+ }
+ }
+ CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
+ IndexHeader indexHeader = null;
+ try {
+ if (indexContent == null) {
+ indexReader.openThriftReader(indexFilePath);
+ } else {
+ indexReader.openThriftReader(indexContent);
+ }
+ // get the index header
+ indexHeader = indexReader.readIndexHeader();
+ } catch (IOException ex) {
+ LOGGER.error(ex);
+ } finally {
+ indexReader.closeThriftReader();
+ }
+ return indexHeader;
+ }
+
}
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableSortColumnsProperty.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableSortColumnsProperty.scala
index 3e23e91..dab9934 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableSortColumnsProperty.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableSortColumnsProperty.scala
@@ -17,12 +17,15 @@
package org.apache.carbondata.spark.testsuite.alterTable
-import org.apache.spark.sql.Row
+import java.io.{ByteArrayOutputStream, PrintStream}
+
+import org.apache.spark.sql.{CarbonEnv, Row}
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll
-
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.carbondata.core.util.path.CarbonTablePath
+import org.apache.carbondata.tool.CarbonCli
class TestAlterTableSortColumnsProperty extends QueryTest with BeforeAndAfterAll {
@@ -134,6 +137,11 @@ class TestAlterTableSortColumnsProperty extends QueryTest with BeforeAndAfterAll
Map("sort_scope"->"local_sort", "sort_columns"->"intField")
)
createAggDataMap("alter_sc_agg_base", "alter_sc_agg_base_dm1")
+
+ createTable(
+ "alter_sc_cli",
+ Map("dictionary_include"->"charField")
+ )
}
private def dropTable(): Unit = {
@@ -153,6 +161,7 @@ class TestAlterTableSortColumnsProperty extends QueryTest with BeforeAndAfterAll
sql(s"drop table if exists alter_sc_bloom_base")
sql(s"drop table if exists alter_sc_agg")
sql(s"drop table if exists alter_sc_agg_base")
+ sql(s"drop table if exists alter_sc_cli")
}
private def createTable(
@@ -570,4 +579,36 @@ class TestAlterTableSortColumnsProperty extends QueryTest with BeforeAndAfterAll
checkExistence(sql(s"EXPLAIN select stringField,max(intField) as sum from $tableName where stringField = 'abc2' group by stringField"), true, "preaggregate", dataMapName)
checkAnswer(sql(s"select stringField,max(intField) as sum from $tableName where stringField = 'abc2' group by stringField"), sql(s"select stringField,max(intField) as sum from $baseTableName where stringField = 'abc2' group by stringField"))
}
+
+ test("carboncli -cmd sort_columns -p <segment folder>") {
+ val tableName = "alter_sc_cli"
+ // no_sort
+ loadData(tableName)
+ sql(s"alter table $tableName set tblproperties('sort_scope'='global_sort', 'sort_columns'='charField, timestampField')")
+ // global_sort
+ loadData(tableName)
+ sql(s"alter table $tableName set tblproperties('sort_scope'='local_sort', 'sort_columns'='intField, stringField')")
+ // local_sort
+ loadData(tableName)
+ // update table to generate one more index in each segment
+ sql(s"update $tableName set (smallIntField, intField, bigIntField, floatField, doubleField) = (smallIntField + 3, intField + 3, bigIntField + 3, floatField + 3, doubleField + 3) where smallIntField = 2").collect()
+
+ val table = CarbonEnv.getCarbonTable(Option("default"), tableName)(sqlContext.sparkSession)
+ val tablePath = table.getTablePath
+ (0 to 2).foreach { segmentId =>
+ val segmentPath = CarbonTablePath.getSegmentPath(tablePath, segmentId.toString)
+ val args: Array[String] = Array("-cmd", "sort_columns", "-p", segmentPath)
+ val out: ByteArrayOutputStream = new ByteArrayOutputStream
+ val stream: PrintStream = new PrintStream(out)
+ CarbonCli.run(args, stream)
+ CarbonCli.cleanOutPuts()
+ val output: String = new String(out.toByteArray)
+ if (segmentId == 2) {
+ assertResult(s"Input Folder: $segmentPath\nsorted by intfield,stringfield\n")(output)
+ } else {
+ assertResult(s"Input Folder: $segmentPath\nunsorted\n")(output)
+ }
+ }
+ }
+
}
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
index a122394..d562b8e 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
@@ -51,6 +51,10 @@ public class CarbonCli {
// by default true, and it will be set to false if the cli is trigerred via sql command
private static boolean isPrintInConsole = true;
+ static class OptionsHolder {
+ static Options instance = buildOptions();
+ }
+
private static Options buildOptions() {
Option help = new Option("h", "help", false,"print this message");
Option path = OptionBuilder.withArgName("path")
@@ -117,7 +121,7 @@ public class CarbonCli {
// this boolean to check whether to print in console or not
isPrintInConsole = false;
outPuts = e;
- Options options = buildOptions();
+ Options options = OptionsHolder.instance;
CommandLineParser parser = new PosixParser();
CommandLine line;
@@ -131,7 +135,7 @@ public class CarbonCli {
}
public static void run(String[] args, PrintStream out) {
- Options options = buildOptions();
+ Options options = OptionsHolder.instance;
CommandLineParser parser = new PosixParser();
CommandLine line;
@@ -169,6 +173,18 @@ public class CarbonCli {
command = new DataSummary(path, outPuts);
} else if (cmd.equalsIgnoreCase("benchmark")) {
command = new ScanBenchmark(path, outPuts);
+ } else if (cmd.equalsIgnoreCase("sort_columns")) {
+ if (line.hasOption("p")) {
+ try {
+ new FileCollector(outPuts).collectSortColumns(line.getOptionValue("p"));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ for (String output : outPuts) {
+ out.println(output);
+ }
+ }
+ return;
} else {
out.println("command " + cmd + " is not supported");
outPuts.add("command " + cmd + " is not supported");
@@ -204,4 +220,8 @@ public class CarbonCli {
outPuts.add(stringWriter.toString());
}
+ public static void cleanOutPuts() {
+ outPuts = null;
+ }
+
}
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
index 9cd743a..eff46d1 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
@@ -21,11 +21,15 @@ import java.io.IOException;
import java.util.*;
import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.format.BlockletInfo3;
+import org.apache.carbondata.format.ColumnSchema;
import org.apache.carbondata.format.FileFooter3;
+import org.apache.carbondata.format.IndexHeader;
/**
* A helper to collect all data files, schema file, table status file in a given folder
@@ -146,6 +150,58 @@ class FileCollector {
outPuts.add(format1);
}
+ private String makeSortColumnsString(List<ColumnSchema> columnList) {
+ StringBuilder builder = new StringBuilder();
+ for (ColumnSchema column : columnList) {
+ if (column.isDimension()) {
+ Map<String, String> properties = column.getColumnProperties();
+ if (properties != null) {
+ if (properties.get(CarbonCommonConstants.SORT_COLUMNS) != null) {
+ builder.append(column.column_name).append(",");
+ }
+ }
+ }
+ }
+ if (builder.length() > 1) {
+ return builder.substring(0, builder.length() - 1);
+ } else {
+ return "";
+ }
+ }
+
+ public void collectSortColumns(String segmentFolder) throws IOException {
+ CarbonFile[] files = SegmentIndexFileStore.getCarbonIndexFiles(
+ segmentFolder, FileFactory.getConfiguration());
+ Set<Boolean> isSortSet = new HashSet<>();
+ Set<String> sortColumnsSet = new HashSet<>();
+ if (files != null) {
+ for (CarbonFile file : files) {
+ IndexHeader indexHeader = SegmentIndexFileStore.readIndexHeader(
+ file.getCanonicalPath(), FileFactory.getConfiguration());
+ if (indexHeader != null) {
+ if (indexHeader.isSetIs_sort()) {
+ isSortSet.add(indexHeader.is_sort);
+ if (indexHeader.is_sort) {
+ sortColumnsSet.add(makeSortColumnsString(indexHeader.getTable_columns()));
+ }
+ } else {
+ // if is_sort is not set, it will be old store and consider as local_sort by default.
+ sortColumnsSet.add(makeSortColumnsString(indexHeader.getTable_columns()));
+ }
+ }
+ if (isSortSet.size() >= 2 || sortColumnsSet.size() >= 2) {
+ break;
+ }
+ }
+ }
+ // for all index files, sort_columns should be same
+ if (isSortSet.size() <= 1 && sortColumnsSet.size() == 1) {
+ outPuts.add("sorted by " + sortColumnsSet.iterator().next());
+ } else {
+ outPuts.add("unsorted");
+ }
+ }
+
public void close() throws IOException {
for (DataFile file : dataFiles.values()) {
file.close();
diff --git a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index 87d2e11..26901f8 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -185,6 +185,19 @@ public class CarbonCliTest {
}
@Test
+ public void testSortColumnsOfSegmentFolder() {
+ String[] args = {"-cmd", "sort_columns", "-p", path};
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PrintStream stream = new PrintStream(out);
+ CarbonCli.run(args, stream);
+ String output = new String(out.toByteArray());
+ String expectedOutput = buildLines(
+ "Input Folder: ./CarbonCliTest",
+ "sorted by name");
+ Assert.assertTrue(output.contains(expectedOutput));
+ }
+
+ @Test
public void testSummaryOutputAll() {
String[] args = {"-cmd", "summary", "-p", path, "-a", "-c", "age"};
ByteArrayOutputStream out = new ByteArrayOutputStream();