You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/09/14 14:08:15 UTC
carbondata git commit: [CARBONDATA-2926] fixed
ArrayIndexOutOfBoundException with varchar columns and empty sort_columns
Repository: carbondata
Updated Branches:
refs/heads/master 2b7c8b374 -> f354e0b2b
[CARBONDATA-2926] fixed ArrayIndexOutOfBoundException with varchar columns and empty sort_columns
problem:
ArrayIndexOutOfBoundException if varchar column is present before dictionary columns along with empty sort_columns.
root cause:
CarbonFactDataHandlerColumnar.isVarcharColumnFull() method uses model.getVarcharDimIdxInNoDict()
and index of varchar column in no dictonary array became negative.
currently index was calculated based on ordinal-number of dictionary columns. This can go negative in no_sort column case,
solution:
take the varchar dimension index from no dictionary array from at runtime based on schema.
Also hotfix: removed number validation in table properties of sdk, now we support 7 properties
This closes #2705
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f354e0b2
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f354e0b2
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f354e0b2
Branch: refs/heads/master
Commit: f354e0b2b5704a3846b622f9256dc898022b9177
Parents: 2b7c8b3
Author: ajantha-bhat <aj...@gmail.com>
Authored: Mon Sep 10 23:08:15 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Fri Sep 14 19:38:03 2018 +0530
----------------------------------------------------------------------
.../carbondata/core/datastore/TableSpec.java | 17 +++++++++++
.../schema/table/TableSchemaBuilder.java | 9 +++++-
.../sdv/generated/SDKwriterTestCase.scala | 31 ++++++++++++++++++++
.../store/CarbonFactDataHandlerModel.java | 10 ++++---
.../carbondata/processing/store/TablePage.java | 4 ++-
.../sdk/file/CarbonWriterBuilder.java | 6 ----
6 files changed, 65 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java b/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java
index 4d30cb0..bded430 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java
@@ -20,6 +20,7 @@ package org.apache.carbondata.core.datastore;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import org.apache.carbondata.core.metadata.datatype.DataType;
@@ -36,6 +37,14 @@ public class TableSpec {
private DimensionSpec[] dimensionSpec;
private MeasureSpec[] measureSpec;
+ // Many places we might have to access no-dictionary column spec.
+ // but no-dictionary column spec are not always in below order like,
+ // dictionary + no dictionary + complex + measure
+ // when sort_columns are empty, no columns are selected for sorting.
+ // so, spec will not be in above order.
+ // Hence noDictionaryDimensionSpec will be useful and it will be subset of dimensionSpec.
+ private List<DimensionSpec> noDictionaryDimensionSpec;
+
// number of simple dimensions
private int numSimpleDimensions;
@@ -56,6 +65,7 @@ public class TableSpec {
}
dimensionSpec = new DimensionSpec[dimensions.size()];
measureSpec = new MeasureSpec[measures.size()];
+ noDictionaryDimensionSpec = new ArrayList<>();
addDimensions(dimensions);
addMeasures(measures);
}
@@ -67,10 +77,12 @@ public class TableSpec {
if (dimension.isComplex()) {
DimensionSpec spec = new DimensionSpec(ColumnType.COMPLEX, dimension);
dimensionSpec[dimIndex++] = spec;
+ noDictionaryDimensionSpec.add(spec);
} else if (dimension.getDataType() == DataTypes.TIMESTAMP && !dimension
.isDirectDictionaryEncoding()) {
DimensionSpec spec = new DimensionSpec(ColumnType.PLAIN_VALUE, dimension);
dimensionSpec[dimIndex++] = spec;
+ noDictionaryDimensionSpec.add(spec);
} else if (dimension.isDirectDictionaryEncoding()) {
DimensionSpec spec = new DimensionSpec(ColumnType.DIRECT_DICTIONARY, dimension);
dimensionSpec[dimIndex++] = spec;
@@ -80,6 +92,7 @@ public class TableSpec {
} else {
DimensionSpec spec = new DimensionSpec(ColumnType.PLAIN_VALUE, dimension);
dimensionSpec[dimIndex++] = spec;
+ noDictionaryDimensionSpec.add(spec);
}
}
}
@@ -95,6 +108,10 @@ public class TableSpec {
return dimensionSpec[dimensionIndex];
}
+ public List<DimensionSpec> getNoDictionaryDimensionSpec() {
+ return noDictionaryDimensionSpec;
+ }
+
public MeasureSpec getMeasureSpec(int measureIndex) {
return measureSpec[measureIndex];
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
index c8eaa16..f1be5ca 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
@@ -50,6 +50,8 @@ public class TableSchemaBuilder {
private List<ColumnSchema> dimension = new LinkedList<>();
+ private List<ColumnSchema> varCharColumns = new LinkedList<>();
+
private List<ColumnSchema> complex = new LinkedList<>();
private List<ColumnSchema> measures = new LinkedList<>();
@@ -107,6 +109,7 @@ public class TableSchemaBuilder {
schema.setSchemaEvolution(schemaEvol);
List<ColumnSchema> allColumns = new LinkedList<>(sortColumns);
allColumns.addAll(dimension);
+ allColumns.addAll(varCharColumns);
allColumns.addAll(complex);
allColumns.addAll(measures);
schema.setListOfColumns(allColumns);
@@ -214,7 +217,11 @@ public class TableSchemaBuilder {
|| isComplexChild) {
complex.add(newColumn);
} else {
- dimension.add(newColumn);
+ if (field.getDataType() == DataTypes.VARCHAR) {
+ varCharColumns.add(newColumn);
+ } else {
+ dimension.add(newColumn);
+ }
}
}
if (newColumn.isDimensionColumn()) {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
index 40a0a62..267a05c 100644
--- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
+++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
@@ -756,6 +756,37 @@ class SDKwriterTestCase extends QueryTest with BeforeAndAfterEach {
sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION '$writerPath'")
checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5)))
}
+
+ test("Test sdk with longstring with empty sort column and some direct dictionary columns") {
+ // here we specify the longstring column as varchar
+ val schema = new StringBuilder()
+ .append("[ \n")
+ .append(" {\"address\":\"varchar\"},\n")
+ .append(" {\"date1\":\"date\"},\n")
+ .append(" {\"date2\":\"date\"},\n")
+ .append(" {\"age\":\"int\"}\n")
+ .append("]")
+ .toString()
+ val builder = CarbonWriter.builder()
+ val writer = builder
+ .outputPath(writerPath)
+ .sortBy(Array[String]())
+ .buildWriterForCSVInput(Schema.parseJson(schema))
+
+ for (i <- 0 until 5) {
+ writer
+ .write(Array[String](RandomStringUtils.randomAlphabetic(40000),
+ "1999-12-01",
+ "1998-12-01",
+ i.toString))
+ }
+ writer.close()
+
+ assert(FileFactory.getCarbonFile(writerPath).exists)
+ sql("DROP TABLE IF EXISTS sdkTable")
+ sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION '$writerPath'")
+ checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5)))
+ }
}
object avroUtil{
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
index 54dd0aa..1a38de6 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
@@ -224,14 +224,16 @@ public class CarbonFactDataHandlerModel {
simpleDimsLen[i] = dimLens[i];
}
+ int noDictionayDimensionIndex = 0;
// for dynamic page size in write step if varchar columns exist
List<Integer> varcharDimIdxInNoDict = new ArrayList<>();
for (DataField dataField : configuration.getDataFields()) {
CarbonColumn column = dataField.getColumn();
- if (!column.isComplex() && !dataField.hasDictionaryEncoding() &&
- column.getDataType() == DataTypes.VARCHAR) {
- // ordinal is set in CarbonTable.fillDimensionsAndMeasuresForTables()
- varcharDimIdxInNoDict.add(column.getOrdinal() - simpleDimsCount);
+ if (!dataField.hasDictionaryEncoding()) {
+ if (!column.isComplex() && column.getDataType() == DataTypes.VARCHAR) {
+ varcharDimIdxInNoDict.add(noDictionayDimensionIndex);
+ }
+ noDictionayDimensionIndex++;
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
index 73746d6..a311483 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
@@ -195,8 +195,10 @@ public class TablePage {
if (noDictionaryCount > 0 || complexColumnCount > 0) {
TableSpec tableSpec = model.getTableSpec();
byte[][] noDictAndComplex = WriteStepRowUtil.getNoDictAndComplexDimension(row);
+ List<TableSpec.DimensionSpec> noDictionaryDimensionSpec =
+ tableSpec.getNoDictionaryDimensionSpec();
for (int i = 0; i < noDictAndComplex.length; i++) {
- if (tableSpec.getDimensionSpec(dictDimensionPages.length + i).getSchemaDataType()
+ if (noDictionaryDimensionSpec.get(i).getSchemaDataType()
== DataTypes.VARCHAR) {
byte[] valueWithLength = addIntLengthToByteArray(noDictAndComplex[i]);
noDictDimensionPages[i].putData(rowId, valueWithLength);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
index 5f3e7b8..388870f 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
@@ -294,12 +294,6 @@ public class CarbonWriterBuilder {
*/
public CarbonWriterBuilder withTableProperties(Map<String, String> options) {
Objects.requireNonNull(options, "Table properties should not be null");
- //validate the options.
- if (options.size() > 5) {
- throw new IllegalArgumentException("Supports only 5 options now. "
- + "Refer method header or documentation");
- }
-
Set<String> supportedOptions = new HashSet<>(Arrays
.asList("table_blocksize", "table_blocklet_size", "local_dictionary_threshold",
"local_dictionary_enable", "sort_columns", "sort_scope", "long_string_columns"));