You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/05/31 05:13:30 UTC

[carbondata] branch master updated: [CARBONDATA-3349] support carboncli show sort_columns for a segment folder

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new be3e2a8  [CARBONDATA-3349] support carboncli show sort_columns for a segment folder
be3e2a8 is described below

commit be3e2a80685a89ab37881f6400b708e71e5cb206
Author: QiangCai <qi...@qq.com>
AuthorDate: Wed May 15 16:16:54 2019 +0800

    [CARBONDATA-3349] support carboncli show sort_columns for a segment folder
    
    carboncli support to show sort_columns for a segment folder.
    carboncli -cmd sort_columns -p <segment_folder>
    
    This closes #3183
---
 .../blockletindex/SegmentIndexFileStore.java       | 36 ++++++++++++++
 .../TestAlterTableSortColumnsProperty.scala        | 45 ++++++++++++++++-
 .../java/org/apache/carbondata/tool/CarbonCli.java | 24 +++++++++-
 .../org/apache/carbondata/tool/FileCollector.java  | 56 ++++++++++++++++++++++
 .../org/apache/carbondata/tool/CarbonCliTest.java  | 13 +++++
 5 files changed, 170 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java
index ae5be68..4351f3a 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -44,6 +45,7 @@ import org.apache.carbondata.core.util.CarbonUtil;
 import org.apache.carbondata.core.util.DataFileFooterConverter;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.format.BlockIndex;
+import org.apache.carbondata.format.IndexHeader;
 import org.apache.carbondata.format.MergedBlockIndex;
 import org.apache.carbondata.format.MergedBlockIndexHeader;
 
@@ -515,4 +517,38 @@ public class SegmentIndexFileStore {
   public Map<String, List<String>> getCarbonMergeFileToIndexFilesMap() {
     return carbonMergeFileToIndexFilesMap;
   }
+
+  public static IndexHeader readIndexHeader(String indexFilePath, Configuration configuration) {
+    byte[] indexContent = null;
+    if (indexFilePath.toLowerCase().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
+      SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore(configuration);
+      try {
+        indexFileStore.readMergeFile(indexFilePath);
+      } catch (IOException ex) {
+        LOGGER.error(ex);
+      }
+      Iterator<Map.Entry<String, byte[]>> iterator =
+          indexFileStore.getCarbonIndexMap().entrySet().iterator();
+      if (iterator.hasNext()) {
+        indexContent = iterator.next().getValue();
+      }
+    }
+    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
+    IndexHeader indexHeader = null;
+    try {
+      if (indexContent == null) {
+        indexReader.openThriftReader(indexFilePath);
+      } else {
+        indexReader.openThriftReader(indexContent);
+      }
+      // get the index header
+      indexHeader = indexReader.readIndexHeader();
+    } catch (IOException ex) {
+      LOGGER.error(ex);
+    } finally {
+      indexReader.closeThriftReader();
+    }
+    return indexHeader;
+  }
+
 }
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableSortColumnsProperty.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableSortColumnsProperty.scala
index 3e23e91..dab9934 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableSortColumnsProperty.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableSortColumnsProperty.scala
@@ -17,12 +17,15 @@
 
 package org.apache.carbondata.spark.testsuite.alterTable
 
-import org.apache.spark.sql.Row
+import java.io.{ByteArrayOutputStream, PrintStream}
+
+import org.apache.spark.sql.{CarbonEnv, Row}
 import org.apache.spark.sql.test.util.QueryTest
 import org.scalatest.BeforeAndAfterAll
-
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.carbondata.core.util.path.CarbonTablePath
+import org.apache.carbondata.tool.CarbonCli
 
 class TestAlterTableSortColumnsProperty extends QueryTest with BeforeAndAfterAll {
 
@@ -134,6 +137,11 @@ class TestAlterTableSortColumnsProperty extends QueryTest with BeforeAndAfterAll
       Map("sort_scope"->"local_sort", "sort_columns"->"intField")
     )
     createAggDataMap("alter_sc_agg_base", "alter_sc_agg_base_dm1")
+
+    createTable(
+      "alter_sc_cli",
+      Map("dictionary_include"->"charField")
+    )
   }
 
   private def dropTable(): Unit = {
@@ -153,6 +161,7 @@ class TestAlterTableSortColumnsProperty extends QueryTest with BeforeAndAfterAll
     sql(s"drop table if exists alter_sc_bloom_base")
     sql(s"drop table if exists alter_sc_agg")
     sql(s"drop table if exists alter_sc_agg_base")
+    sql(s"drop table if exists alter_sc_cli")
   }
 
   private def createTable(
@@ -570,4 +579,36 @@ class TestAlterTableSortColumnsProperty extends QueryTest with BeforeAndAfterAll
     checkExistence(sql(s"EXPLAIN select stringField,max(intField) as sum from $tableName where stringField = 'abc2' group by stringField"), true, "preaggregate", dataMapName)
     checkAnswer(sql(s"select stringField,max(intField) as sum from $tableName where stringField = 'abc2' group by stringField"), sql(s"select stringField,max(intField) as sum from $baseTableName where stringField = 'abc2' group by stringField"))
   }
+
+  test("carboncli -cmd sort_columns -p <segment folder>") {
+    val tableName = "alter_sc_cli"
+    // no_sort
+    loadData(tableName)
+    sql(s"alter table $tableName set tblproperties('sort_scope'='global_sort', 'sort_columns'='charField, timestampField')")
+    // global_sort
+    loadData(tableName)
+    sql(s"alter table $tableName set tblproperties('sort_scope'='local_sort', 'sort_columns'='intField, stringField')")
+    // local_sort
+    loadData(tableName)
+    // update table to generate one more index in each segment
+    sql(s"update $tableName set (smallIntField, intField, bigIntField, floatField, doubleField) = (smallIntField + 3, intField + 3, bigIntField + 3, floatField + 3, doubleField + 3) where smallIntField = 2").collect()
+
+    val table = CarbonEnv.getCarbonTable(Option("default"), tableName)(sqlContext.sparkSession)
+    val tablePath = table.getTablePath
+    (0 to 2).foreach { segmentId =>
+      val segmentPath = CarbonTablePath.getSegmentPath(tablePath, segmentId.toString)
+      val args: Array[String] = Array("-cmd", "sort_columns", "-p", segmentPath)
+      val out: ByteArrayOutputStream = new ByteArrayOutputStream
+      val stream: PrintStream = new PrintStream(out)
+      CarbonCli.run(args, stream)
+      CarbonCli.cleanOutPuts()
+      val output: String = new String(out.toByteArray)
+      if (segmentId == 2) {
+        assertResult(s"Input Folder: $segmentPath\nsorted by intfield,stringfield\n")(output)
+      } else {
+        assertResult(s"Input Folder: $segmentPath\nunsorted\n")(output)
+      }
+    }
+  }
+
 }
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
index a122394..d562b8e 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
@@ -51,6 +51,10 @@ public class CarbonCli {
   // by default true, and it will be set to false if the cli is trigerred via sql command
   private static boolean isPrintInConsole = true;
 
+  static class OptionsHolder {
+    static Options instance = buildOptions();
+  }
+
   private static Options buildOptions() {
     Option help = new Option("h", "help", false,"print this message");
     Option path = OptionBuilder.withArgName("path")
@@ -117,7 +121,7 @@ public class CarbonCli {
     // this boolean to check whether to print in console or not
     isPrintInConsole = false;
     outPuts = e;
-    Options options = buildOptions();
+    Options options = OptionsHolder.instance;
     CommandLineParser parser = new PosixParser();
 
     CommandLine line;
@@ -131,7 +135,7 @@ public class CarbonCli {
   }
 
   public static void run(String[] args, PrintStream out) {
-    Options options = buildOptions();
+    Options options = OptionsHolder.instance;
     CommandLineParser parser = new PosixParser();
 
     CommandLine line;
@@ -169,6 +173,18 @@ public class CarbonCli {
       command = new DataSummary(path, outPuts);
     } else if (cmd.equalsIgnoreCase("benchmark")) {
       command = new ScanBenchmark(path, outPuts);
+    } else if (cmd.equalsIgnoreCase("sort_columns")) {
+      if (line.hasOption("p")) {
+        try {
+          new FileCollector(outPuts).collectSortColumns(line.getOptionValue("p"));
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+        for (String output : outPuts) {
+          out.println(output);
+        }
+      }
+      return;
     } else {
       out.println("command " + cmd + " is not supported");
       outPuts.add("command " + cmd + " is not supported");
@@ -204,4 +220,8 @@ public class CarbonCli {
     outPuts.add(stringWriter.toString());
   }
 
+  public static void cleanOutPuts() {
+    outPuts = null;
+  }
+
 }
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
index 9cd743a..eff46d1 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
@@ -21,11 +21,15 @@ import java.io.IOException;
 import java.util.*;
 
 import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.format.BlockletInfo3;
+import org.apache.carbondata.format.ColumnSchema;
 import org.apache.carbondata.format.FileFooter3;
+import org.apache.carbondata.format.IndexHeader;
 
 /**
  * A helper to collect all data files, schema file, table status file in a given folder
@@ -146,6 +150,58 @@ class FileCollector {
     outPuts.add(format1);
   }
 
+  private String makeSortColumnsString(List<ColumnSchema> columnList) {
+    StringBuilder builder = new StringBuilder();
+    for (ColumnSchema column : columnList) {
+      if (column.isDimension()) {
+        Map<String, String> properties = column.getColumnProperties();
+        if (properties != null) {
+          if (properties.get(CarbonCommonConstants.SORT_COLUMNS) != null) {
+            builder.append(column.column_name).append(",");
+          }
+        }
+      }
+    }
+    if (builder.length() > 1) {
+      return builder.substring(0, builder.length() - 1);
+    } else {
+      return "";
+    }
+  }
+
+  public void collectSortColumns(String segmentFolder) throws IOException {
+    CarbonFile[] files = SegmentIndexFileStore.getCarbonIndexFiles(
+        segmentFolder, FileFactory.getConfiguration());
+    Set<Boolean> isSortSet = new HashSet<>();
+    Set<String> sortColumnsSet = new HashSet<>();
+    if (files != null) {
+      for (CarbonFile file : files) {
+        IndexHeader indexHeader = SegmentIndexFileStore.readIndexHeader(
+            file.getCanonicalPath(), FileFactory.getConfiguration());
+        if (indexHeader != null) {
+          if (indexHeader.isSetIs_sort()) {
+            isSortSet.add(indexHeader.is_sort);
+            if (indexHeader.is_sort) {
+              sortColumnsSet.add(makeSortColumnsString(indexHeader.getTable_columns()));
+            }
+          } else {
+            // if is_sort is not set, it will be old store and consider as local_sort by default.
+            sortColumnsSet.add(makeSortColumnsString(indexHeader.getTable_columns()));
+          }
+        }
+        if (isSortSet.size() >= 2 || sortColumnsSet.size() >= 2) {
+          break;
+        }
+      }
+    }
+    // for all index files, sort_columns should be same
+    if (isSortSet.size() <= 1 && sortColumnsSet.size() == 1) {
+      outPuts.add("sorted by " + sortColumnsSet.iterator().next());
+    } else {
+      outPuts.add("unsorted");
+    }
+  }
+
   public void close() throws IOException {
     for (DataFile file : dataFiles.values()) {
       file.close();
diff --git a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index 87d2e11..26901f8 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -185,6 +185,19 @@ public class CarbonCliTest {
   }
 
   @Test
+  public void testSortColumnsOfSegmentFolder() {
+    String[] args = {"-cmd", "sort_columns", "-p", path};
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    PrintStream stream = new PrintStream(out);
+    CarbonCli.run(args, stream);
+    String output = new String(out.toByteArray());
+    String expectedOutput = buildLines(
+        "Input Folder: ./CarbonCliTest",
+        "sorted by name");
+    Assert.assertTrue(output.contains(expectedOutput));
+  }
+
+  @Test
   public void testSummaryOutputAll() {
     String[] args = {"-cmd", "summary", "-p", path, "-a", "-c", "age"};
     ByteArrayOutputStream out = new ByteArrayOutputStream();