You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by gv...@apache.org on 2018/06/08 11:40:54 UTC
[29/50] [abbrv] carbondata git commit: [CARBONDATA-2524] Support
create carbonReader with default projection
[CARBONDATA-2524] Support create carbonReader with default projection
This closes #2338
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8b80b12e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8b80b12e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8b80b12e
Branch: refs/heads/spark-2.3
Commit: 8b80b12eca261016000f0ab132e8558a4e87fc95
Parents: 7f4bd3d
Author: xubo245 <xu...@huawei.com>
Authored: Thu May 24 09:33:23 2018 +0800
Committer: QiangCai <qi...@qq.com>
Committed: Mon May 28 22:10:18 2018 +0800
----------------------------------------------------------------------
docs/sdk-guide.md | 10 ++
.../sdk/file/CarbonReaderBuilder.java | 40 +++++++-
.../carbondata/sdk/file/CarbonReaderTest.java | 101 +++++++++++++++++++
3 files changed, 149 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/8b80b12e/docs/sdk-guide.md
----------------------------------------------------------------------
diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md
index 4d258f0..328a845 100644
--- a/docs/sdk-guide.md
+++ b/docs/sdk-guide.md
@@ -493,6 +493,16 @@ Find example code at [CarbonReaderExample](https://github.com/apache/carbondata/
```
/**
+ * Project all Columns for carbon reader
+ *
+ * @return CarbonReaderBuilder object
+ * @throws IOException
+ */
+ public CarbonReaderBuilder projectAllColumns();
+```
+
+```
+ /**
* Configure the transactional status of table
* If set to false, then reads the carbondata and carbonindex files from a flat folder structure.
* If set to true, then reads the carbondata and carbonindex files from segment folder structure.
http://git-wip-us.apache.org/repos/asf/carbondata/blob/8b80b12e/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
index c78cda0..4103c63 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
@@ -26,6 +26,7 @@ import org.apache.carbondata.common.annotations.InterfaceAudience;
import org.apache.carbondata.common.annotations.InterfaceStability;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.scan.expression.Expression;
import org.apache.carbondata.hadoop.CarbonProjection;
import org.apache.carbondata.hadoop.api.CarbonFileInputFormat;
@@ -51,6 +52,12 @@ public class CarbonReaderBuilder {
private boolean isTransactionalTable = true;
/**
+ * It will be true if use the projectAllColumns method,
+ * it will be false if use the projection method
+ */
+ private boolean isProjectAllColumns = true;
+
+ /**
* Construct a CarbonReaderBuilder with table path and table name
*
* @param tablePath table path
@@ -70,6 +77,7 @@ public class CarbonReaderBuilder {
public CarbonReaderBuilder projection(String[] projectionColumnNames) {
Objects.requireNonNull(projectionColumnNames);
this.projectionColumns = projectionColumnNames;
+ isProjectAllColumns = false;
return this;
}
@@ -89,6 +97,33 @@ public class CarbonReaderBuilder {
}
/**
+ * Project all Columns for carbon reader
+ *
+ * @return CarbonReaderBuilder object
+ * @throws IOException
+ */
+ public CarbonReaderBuilder projectAllColumns() throws IOException {
+ CarbonTable carbonTable = CarbonTable
+ .buildFromTablePath(tableName, tablePath, isTransactionalTable);
+
+ List<ColumnSchema> colList = carbonTable.getTableInfo().getFactTable().getListOfColumns();
+ List<String> projectColumn = new ArrayList<String>();
+ for (ColumnSchema cols : colList) {
+ if (cols.getSchemaOrdinal() != -1) {
+ projectColumn.add(cols.getColumnUniqueId());
+ }
+ }
+ projectionColumns = new String[projectColumn.size()];
+ int i = 0;
+ for (String columnName : projectColumn) {
+ projectionColumns[i] = columnName;
+ i++;
+ }
+ isProjectAllColumns = true;
+ return this;
+ }
+
+ /**
* Configure the filter expression for carbon reader
*
* @param filterExpression filter expression
@@ -186,9 +221,10 @@ public class CarbonReaderBuilder {
if (filterExpression != null) {
format.setFilterPredicates(job.getConfiguration(), filterExpression);
}
- if (projectionColumns != null) {
- format.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectionColumns));
+ if (isProjectAllColumns) {
+ projectAllColumns();
}
+ format.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectionColumns));
final List<InputSplit> splits =
format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
http://git-wip-us.apache.org/repos/asf/carbondata/blob/8b80b12e/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
index 0d2c84e..756dbe4 100644
--- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
@@ -409,4 +409,105 @@ public class CarbonReaderTest extends TestCase {
badRecordLoc);
}
+ @Test
+ public void testReadFilesWithProjectAllColumns() throws IOException, InterruptedException {
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[2];
+ fields[0] = new Field("name", DataTypes.STRING);
+ fields[1] = new Field("age", DataTypes.INT);
+
+ TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+ CarbonReader reader = CarbonReader
+ .builder(path, "_temp")
+ .projectAllColumns()
+ .build();
+
+ // expected output after sorting
+ String[] name = new String[100];
+ int[] age = new int[100];
+ for (int i = 0; i < 100; i++) {
+ name[i] = "robot" + (i / 10);
+ age[i] = (i % 10) * 10 + i / 10;
+ }
+
+ int i = 0;
+ while (reader.hasNext()) {
+ Object[] row = (Object[]) reader.readNextRow();
+ // Default sort column is applied for dimensions. So, need to validate accordingly
+ Assert.assertEquals(name[i], row[0]);
+ Assert.assertEquals(age[i], row[1]);
+ i++;
+ }
+ Assert.assertEquals(i, 100);
+
+ reader.close();
+ FileUtils.deleteDirectory(new File(path));
+ }
+
+ @Test
+ public void testReadFilesWithDefaultProjection() throws IOException, InterruptedException {
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[2];
+ fields[0] = new Field("name", DataTypes.STRING);
+ fields[1] = new Field("age", DataTypes.INT);
+
+ TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+ CarbonReader reader = CarbonReader
+ .builder(path, "_temp")
+ .build();
+
+ // expected output after sorting
+ String[] name = new String[100];
+ int[] age = new int[100];
+ for (int i = 0; i < 100; i++) {
+ name[i] = "robot" + (i / 10);
+ age[i] = (i % 10) * 10 + i / 10;
+ }
+
+ int i = 0;
+ while (reader.hasNext()) {
+ Object[] row = (Object[]) reader.readNextRow();
+ Assert.assertEquals(name[i], row[0]);
+ Assert.assertEquals(age[i], row[1]);
+ i++;
+ }
+ Assert.assertEquals(i, 100);
+ }
+
+ @Test
+ public void testReadFilesWithNullProjection() throws IOException, InterruptedException {
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[2];
+ fields[0] = new Field("name", DataTypes.STRING);
+ fields[1] = new Field("age", DataTypes.INT);
+
+ TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+ CarbonReader reader = CarbonReader
+ .builder(path, "_temp")
+ .projection(new String[]{})
+ .build();
+
+ // expected output after sorting
+ String[] name = new String[100];
+ int[] age = new int[100];
+ for (int i = 0; i < 100; i++) {
+ name[i] = "robot" + (i / 10);
+ age[i] = (i % 10) * 10 + i / 10;
+ }
+ // Default sort column is applied for dimensions. So, need to validate accordingly
+
+ while (reader.hasNext()) {
+ Object[] row = (Object[]) reader.readNextRow();
+ assert(row.length==0);
+ }
+ }
}