You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/06/05 10:41:57 UTC

[12/26] carbondata git commit: [CARBONDATA-2524] Support create carbonReader with default projection

[CARBONDATA-2524] Support create carbonReader with default projection

This closes #2338


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/60b65691
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/60b65691
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/60b65691

Branch: refs/heads/branch-1.4
Commit: 60b65691fec1ce28dda5187100cf8109796befa8
Parents: f27fe0a
Author: xubo245 <xu...@huawei.com>
Authored: Thu May 24 09:33:23 2018 +0800
Committer: ravipesala <ra...@gmail.com>
Committed: Tue Jun 5 16:04:20 2018 +0530

----------------------------------------------------------------------
 docs/sdk-guide.md                               |  10 ++
 .../sdk/file/CarbonReaderBuilder.java           |  40 +++++++-
 .../carbondata/sdk/file/CarbonReaderTest.java   | 101 +++++++++++++++++++
 3 files changed, 149 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/60b65691/docs/sdk-guide.md
----------------------------------------------------------------------
diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md
index 4d258f0..328a845 100644
--- a/docs/sdk-guide.md
+++ b/docs/sdk-guide.md
@@ -493,6 +493,16 @@ Find example code at [CarbonReaderExample](https://github.com/apache/carbondata/
 
 ```
   /**
+   * Project all Columns for carbon reader
+   *
+   * @return CarbonReaderBuilder object
+   * @throws IOException
+   */
+  public CarbonReaderBuilder projectAllColumns();
+```
+
+```
+  /**
    * Configure the transactional status of table
    * If set to false, then reads the carbondata and carbonindex files from a flat folder structure.
    * If set to true, then reads the carbondata and carbonindex files from segment folder structure.

http://git-wip-us.apache.org/repos/asf/carbondata/blob/60b65691/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
index c78cda0..4103c63 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
@@ -26,6 +26,7 @@ import org.apache.carbondata.common.annotations.InterfaceAudience;
 import org.apache.carbondata.common.annotations.InterfaceStability;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
 import org.apache.carbondata.core.scan.expression.Expression;
 import org.apache.carbondata.hadoop.CarbonProjection;
 import org.apache.carbondata.hadoop.api.CarbonFileInputFormat;
@@ -51,6 +52,12 @@ public class CarbonReaderBuilder {
   private boolean isTransactionalTable = true;
 
   /**
+   * It will be true if use the projectAllColumns method,
+   * it will be false if use the projection method
+   */
+  private boolean isProjectAllColumns = true;
+
+  /**
    * Construct a CarbonReaderBuilder with table path and table name
    *
    * @param tablePath table path
@@ -70,6 +77,7 @@ public class CarbonReaderBuilder {
   public CarbonReaderBuilder projection(String[] projectionColumnNames) {
     Objects.requireNonNull(projectionColumnNames);
     this.projectionColumns = projectionColumnNames;
+    isProjectAllColumns = false;
     return this;
   }
 
@@ -89,6 +97,33 @@ public class CarbonReaderBuilder {
   }
 
   /**
+   * Project all Columns for carbon reader
+   *
+   * @return CarbonReaderBuilder object
+   * @throws IOException
+   */
+  public CarbonReaderBuilder projectAllColumns() throws IOException {
+    CarbonTable carbonTable = CarbonTable
+        .buildFromTablePath(tableName, tablePath, isTransactionalTable);
+
+    List<ColumnSchema> colList = carbonTable.getTableInfo().getFactTable().getListOfColumns();
+    List<String> projectColumn = new ArrayList<String>();
+    for (ColumnSchema cols : colList) {
+      if (cols.getSchemaOrdinal() != -1) {
+        projectColumn.add(cols.getColumnUniqueId());
+      }
+    }
+    projectionColumns = new String[projectColumn.size()];
+    int i = 0;
+    for (String columnName : projectColumn) {
+      projectionColumns[i] = columnName;
+      i++;
+    }
+    isProjectAllColumns = true;
+    return this;
+  }
+
+  /**
    * Configure the filter expression for carbon reader
    *
    * @param filterExpression filter expression
@@ -186,9 +221,10 @@ public class CarbonReaderBuilder {
     if (filterExpression != null) {
       format.setFilterPredicates(job.getConfiguration(), filterExpression);
     }
-    if (projectionColumns != null) {
-      format.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectionColumns));
+    if (isProjectAllColumns) {
+      projectAllColumns();
     }
+    format.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectionColumns));
 
     final List<InputSplit> splits =
         format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/60b65691/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
index 0d2c84e..756dbe4 100644
--- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
@@ -409,4 +409,105 @@ public class CarbonReaderTest extends TestCase {
         badRecordLoc);
   }
 
+  @Test
+  public void testReadFilesWithProjectAllColumns() throws IOException, InterruptedException {
+    String path = "./testWriteFiles";
+    FileUtils.deleteDirectory(new File(path));
+
+    Field[] fields = new Field[2];
+    fields[0] = new Field("name", DataTypes.STRING);
+    fields[1] = new Field("age", DataTypes.INT);
+
+    TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+    CarbonReader reader = CarbonReader
+        .builder(path, "_temp")
+        .projectAllColumns()
+        .build();
+
+    // expected output after sorting
+    String[] name = new String[100];
+    int[] age = new int[100];
+    for (int i = 0; i < 100; i++) {
+      name[i] = "robot" + (i / 10);
+      age[i] = (i % 10) * 10 + i / 10;
+    }
+
+    int i = 0;
+    while (reader.hasNext()) {
+      Object[] row = (Object[]) reader.readNextRow();
+      // Default sort column is applied for dimensions. So, need  to validate accordingly
+      Assert.assertEquals(name[i], row[0]);
+      Assert.assertEquals(age[i], row[1]);
+      i++;
+    }
+    Assert.assertEquals(i, 100);
+
+    reader.close();
+    FileUtils.deleteDirectory(new File(path));
+  }
+
+  @Test
+  public void testReadFilesWithDefaultProjection() throws IOException, InterruptedException {
+    String path = "./testWriteFiles";
+    FileUtils.deleteDirectory(new File(path));
+
+    Field[] fields = new Field[2];
+    fields[0] = new Field("name", DataTypes.STRING);
+    fields[1] = new Field("age", DataTypes.INT);
+
+    TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+    CarbonReader reader = CarbonReader
+        .builder(path, "_temp")
+        .build();
+
+    // expected output after sorting
+    String[] name = new String[100];
+    int[] age = new int[100];
+    for (int i = 0; i < 100; i++) {
+      name[i] = "robot" + (i / 10);
+      age[i] = (i % 10) * 10 + i / 10;
+    }
+
+    int i = 0;
+    while (reader.hasNext()) {
+      Object[] row = (Object[]) reader.readNextRow();
+      Assert.assertEquals(name[i], row[0]);
+      Assert.assertEquals(age[i], row[1]);
+      i++;
+    }
+    Assert.assertEquals(i, 100);
+  }
+
+  @Test
+  public void testReadFilesWithNullProjection() throws IOException, InterruptedException {
+    String path = "./testWriteFiles";
+    FileUtils.deleteDirectory(new File(path));
+
+    Field[] fields = new Field[2];
+    fields[0] = new Field("name", DataTypes.STRING);
+    fields[1] = new Field("age", DataTypes.INT);
+
+    TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+    CarbonReader reader = CarbonReader
+        .builder(path, "_temp")
+        .projection(new String[]{})
+        .build();
+
+    // expected output after sorting
+    String[] name = new String[100];
+    int[] age = new int[100];
+    for (int i = 0; i < 100; i++) {
+      name[i] = "robot" + (i / 10);
+      age[i] = (i % 10) * 10 + i / 10;
+    }
+    // Default sort column is applied for dimensions. So, need  to validate accordingly
+
+    while (reader.hasNext()) {
+      Object[] row = (Object[]) reader.readNextRow();
+      assert(row.length==0);
+    }
+  }
 }