You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/06/20 07:29:51 UTC
[45/56] [abbrv] carbondata git commit: add unsafe column page
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java b/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java
index 132a3fa..90cbe75 100644
--- a/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java
+++ b/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java
@@ -17,6 +17,9 @@
package org.apache.carbondata.core.memory;
+import java.util.HashSet;
+import java.util.Set;
+
import org.apache.carbondata.common.logging.LogService;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
@@ -74,6 +77,9 @@ public class UnsafeMemoryManager {
private long minimumMemory;
+ // for debug purpose
+ private Set<MemoryBlock> set = new HashSet<>();
+
private UnsafeMemoryManager(long totalMemory, MemoryAllocator allocator) {
this.totalMemory = totalMemory;
this.allocator = allocator;
@@ -91,12 +97,15 @@ public class UnsafeMemoryManager {
+ " and minimum reserve memory " + minimumMemory);
}
- public synchronized MemoryBlock allocateMemory(long memoryRequested) {
+ private synchronized MemoryBlock allocateMemory(long memoryRequested) {
if (memoryUsed + memoryRequested <= totalMemory) {
MemoryBlock allocate = allocator.allocate(memoryRequested);
memoryUsed += allocate.size();
- LOGGER.info("Memory block is created with size " + allocate.size() +
- " Total memory used " + memoryUsed + " memory left " + (getAvailableMemory()));
+ if (LOGGER.isDebugEnabled()) {
+ set.add(allocate);
+ LOGGER.error("Memory block (" + allocate + ") is created with size " + allocate.size() +
+ ". Total memory used " + memoryUsed + "Bytes, left " + getAvailableMemory() + "Bytes");
+ }
return allocate;
}
return null;
@@ -106,11 +115,14 @@ public class UnsafeMemoryManager {
allocator.free(memoryBlock);
memoryUsed -= memoryBlock.size();
memoryUsed = memoryUsed < 0 ? 0 : memoryUsed;
- LOGGER.info(
- "Memory released, memory used " + memoryUsed + " memory left " + (getAvailableMemory()));
+ if (LOGGER.isDebugEnabled()) {
+ set.remove(memoryBlock);
+ LOGGER.error("Memory block (" + memoryBlock + ") released. Total memory used " + memoryUsed +
+ "Bytes, left " + getAvailableMemory() + "Bytes. Total allocated block: " + set.size());
+ }
}
- public synchronized long getAvailableMemory() {
+ private synchronized long getAvailableMemory() {
return totalMemory - memoryUsed;
}
@@ -142,8 +154,9 @@ public class UnsafeMemoryManager {
tries++;
}
if (baseBlock == null) {
- throw new MemoryException("Not enough memory to create page");
+ throw new MemoryException("Not enough memory");
}
return baseBlock;
}
+
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DataType.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DataType.java b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DataType.java
index b5d175d..b258810 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DataType.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DataType.java
@@ -66,4 +66,7 @@ public enum DataType {
return sizeInBytes;
}
+ public int getSizeBits() {
+ return (int) (Math.log(getSizeInBytes()) / Math.log(2));
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
index 0e7f365..1f97e9b 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
@@ -194,7 +194,6 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
for (int i = 0; i < pageNumbers; i++) {
BitSet set = new BitSet(numberOfRows[i]);
RowIntf row = new RowImpl();
- boolean invalidRowsPresent = false;
for (int index = 0; index < numberOfRows[i]; index++) {
createRow(blockChunkHolder, row ,i, index);
Boolean rslt = false;
@@ -205,7 +204,7 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
// error only once since all rows the evaluation happens so inorder to avoid
// too much log inforation only once the log will be printed.
catch (FilterIllegalMemberException e) {
- FilterUtil.logError(e, invalidRowsPresent);
+ FilterUtil.logError(e, false);
}
if (null != rslt && rslt) {
set.set(index);
@@ -213,7 +212,6 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
}
bitSetGroup.setBitSet(set, i);
}
-
return bitSetGroup;
}
@@ -333,33 +331,28 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
.convertToMeasureColDataChunk(pageIndex);
switch (msrType) {
case SHORT:
- msrValue = (short) measureColumnDataChunk.getColumnPage()
- .getLong(index);
+ msrValue = (short) measureColumnDataChunk.getColumnPage().getLong(index);
break;
case INT:
- msrValue =
- (int)measureColumnDataChunk.getColumnPage().getLong(index);
+ msrValue = (int) measureColumnDataChunk.getColumnPage().getLong(index);
break;
case LONG:
- msrValue =
- measureColumnDataChunk.getColumnPage().getLong(index);
+ msrValue = measureColumnDataChunk.getColumnPage().getLong(index);
break;
case DECIMAL:
- BigDecimal bigDecimalValue =
- measureColumnDataChunk.getColumnPage()
- .getDecimal(index);
- if (null != bigDecimalValue
- && msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale()
- > bigDecimalValue.scale()) {
- bigDecimalValue = bigDecimalValue
- .setScale(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale(),
+ BigDecimal bigDecimalValue = measureColumnDataChunk.getColumnPage().getDecimal(index);
+ if (null != bigDecimalValue &&
+ msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale() >
+ bigDecimalValue.scale()) {
+ bigDecimalValue =
+ bigDecimalValue.setScale(
+ msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale(),
RoundingMode.HALF_UP);
}
msrValue = bigDecimalValue;
break;
default:
- msrValue =
- measureColumnDataChunk.getColumnPage().getDouble(index);
+ msrValue = measureColumnDataChunk.getColumnPage().getDouble(index);
}
record[msrColumnEvalutorInfo.getRowIndex()] =
measureColumnDataChunk.getNullValueIndexHolder().getBitSet().get(index) ? null : msrValue;
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/core/src/main/java/org/apache/carbondata/core/scan/result/AbstractScannedResult.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/AbstractScannedResult.java b/core/src/main/java/org/apache/carbondata/core/scan/result/AbstractScannedResult.java
index 783c29e..132b4f5 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/result/AbstractScannedResult.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/result/AbstractScannedResult.java
@@ -78,7 +78,7 @@ public abstract class AbstractScannedResult {
/**
* dimension column data chunk
*/
- protected DimensionColumnDataChunk[][] dataChunks;
+ protected DimensionColumnDataChunk[][] dimensionDataChunks;
/**
* Raw dimension chunks;
@@ -160,7 +160,7 @@ public abstract class AbstractScannedResult {
* @param dataChunks dimension chunks used in query
*/
public void setDimensionChunks(DimensionColumnDataChunk[][] dataChunks) {
- this.dataChunks = dataChunks;
+ this.dimensionDataChunks = dataChunks;
}
/**
@@ -197,7 +197,7 @@ public abstract class AbstractScannedResult {
byte[] completeKey = new byte[fixedLengthKeySize];
int offset = 0;
for (int i = 0; i < this.dictionaryColumnBlockIndexes.length; i++) {
- offset += dataChunks[dictionaryColumnBlockIndexes[i]][pageCounter]
+ offset += dimensionDataChunks[dictionaryColumnBlockIndexes[i]][pageCounter]
.fillChunkData(completeKey, offset, rowId,
columnGroupKeyStructureInfo.get(dictionaryColumnBlockIndexes[i]));
}
@@ -216,7 +216,7 @@ public abstract class AbstractScannedResult {
int[] completeKey = new int[totalDimensionsSize];
int column = 0;
for (int i = 0; i < this.dictionaryColumnBlockIndexes.length; i++) {
- column = dataChunks[dictionaryColumnBlockIndexes[i]][pageCounter]
+ column = dimensionDataChunks[dictionaryColumnBlockIndexes[i]][pageCounter]
.fillConvertedChunkData(rowId, column, completeKey,
columnGroupKeyStructureInfo.get(dictionaryColumnBlockIndexes[i]));
}
@@ -230,7 +230,7 @@ public abstract class AbstractScannedResult {
public void fillColumnarDictionaryBatch(ColumnVectorInfo[] vectorInfo) {
int column = 0;
for (int i = 0; i < this.dictionaryColumnBlockIndexes.length; i++) {
- column = dataChunks[dictionaryColumnBlockIndexes[i]][pageCounter]
+ column = dimensionDataChunks[dictionaryColumnBlockIndexes[i]][pageCounter]
.fillConvertedChunkData(vectorInfo, column,
columnGroupKeyStructureInfo.get(dictionaryColumnBlockIndexes[i]));
}
@@ -242,7 +242,7 @@ public abstract class AbstractScannedResult {
public void fillColumnarNoDictionaryBatch(ColumnVectorInfo[] vectorInfo) {
int column = 0;
for (int i = 0; i < this.noDictionaryColumnBlockIndexes.length; i++) {
- column = dataChunks[noDictionaryColumnBlockIndexes[i]][pageCounter]
+ column = dimensionDataChunks[noDictionaryColumnBlockIndexes[i]][pageCounter]
.fillConvertedChunkData(vectorInfo, column,
columnGroupKeyStructureInfo.get(noDictionaryColumnBlockIndexes[i]));
}
@@ -360,7 +360,7 @@ public abstract class AbstractScannedResult {
* @return dimension data based on row id
*/
protected byte[] getDimensionData(int dimOrdinal, int rowId) {
- return dataChunks[dimOrdinal][pageCounter].getChunkData(rowId);
+ return dimensionDataChunks[dimOrdinal][pageCounter].getChunkData(rowId);
}
/**
@@ -375,7 +375,7 @@ public abstract class AbstractScannedResult {
int position = 0;
for (int i = 0; i < this.noDictionaryColumnBlockIndexes.length; i++) {
noDictionaryColumnsKeys[position++] =
- dataChunks[noDictionaryColumnBlockIndexes[i]][pageCounter].getChunkData(rowId);
+ dimensionDataChunks[noDictionaryColumnBlockIndexes[i]][pageCounter].getChunkData(rowId);
}
return noDictionaryColumnsKeys;
}
@@ -392,7 +392,7 @@ public abstract class AbstractScannedResult {
int position = 0;
for (int i = 0; i < this.noDictionaryColumnBlockIndexes.length; i++) {
noDictionaryColumnsKeys[position++] = new String(
- dataChunks[noDictionaryColumnBlockIndexes[i]][pageCounter].getChunkData(rowId));
+ dimensionDataChunks[noDictionaryColumnBlockIndexes[i]][pageCounter].getChunkData(rowId));
}
return noDictionaryColumnsKeys;
}
@@ -492,12 +492,12 @@ public abstract class AbstractScannedResult {
*/
public void freeMemory() {
// first free the dimension chunks
- if (null != dataChunks) {
- for (int i = 0; i < dataChunks.length; i++) {
- if (null != dataChunks[i]) {
- for (int j = 0; j < dataChunks[i].length; j++) {
- if (null != dataChunks[i][j]) {
- dataChunks[i][j].freeMemory();
+ if (null != dimensionDataChunks) {
+ for (int i = 0; i < dimensionDataChunks.length; i++) {
+ if (null != dimensionDataChunks[i]) {
+ for (int j = 0; j < dimensionDataChunks[i].length; j++) {
+ if (null != dimensionDataChunks[i][j]) {
+ dimensionDataChunks[i][j].freeMemory();
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/core/src/main/java/org/apache/carbondata/core/scan/result/impl/FilterQueryScannedResult.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/impl/FilterQueryScannedResult.java b/core/src/main/java/org/apache/carbondata/core/scan/result/impl/FilterQueryScannedResult.java
index d9eda6a..8120310 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/result/impl/FilterQueryScannedResult.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/result/impl/FilterQueryScannedResult.java
@@ -93,7 +93,7 @@ public class FilterQueryScannedResult extends AbstractScannedResult {
public void fillColumnarDictionaryBatch(ColumnVectorInfo[] vectorInfo) {
int column = 0;
for (int i = 0; i < this.dictionaryColumnBlockIndexes.length; i++) {
- column = dataChunks[dictionaryColumnBlockIndexes[i]][pageCounter]
+ column = dimensionDataChunks[dictionaryColumnBlockIndexes[i]][pageCounter]
.fillConvertedChunkData(rowMapping[pageCounter], vectorInfo, column,
columnGroupKeyStructureInfo.get(dictionaryColumnBlockIndexes[i]));
}
@@ -105,7 +105,7 @@ public class FilterQueryScannedResult extends AbstractScannedResult {
public void fillColumnarNoDictionaryBatch(ColumnVectorInfo[] vectorInfo) {
int column = 0;
for (int i = 0; i < this.noDictionaryColumnBlockIndexes.length; i++) {
- column = dataChunks[noDictionaryColumnBlockIndexes[i]][pageCounter]
+ column = dimensionDataChunks[noDictionaryColumnBlockIndexes[i]][pageCounter]
.fillConvertedChunkData(rowMapping[pageCounter], vectorInfo, column,
columnGroupKeyStructureInfo.get(noDictionaryColumnBlockIndexes[i]));
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
index e710e40..55483d2 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
@@ -87,7 +87,8 @@ public class FilterScanner extends AbstractBlockletScanner {
*/
@Override public AbstractScannedResult scanBlocklet(BlocksChunkHolder blocksChunkHolder)
throws IOException, FilterUnsupportedException {
- return fillScannedResult(blocksChunkHolder);
+ AbstractScannedResult result = fillScannedResult(blocksChunkHolder);
+ return result;
}
@Override public boolean isScanRequired(BlocksChunkHolder blocksChunkHolder) throws IOException {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
index aee4f54..fb1cba9 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
@@ -493,6 +493,18 @@ public final class ByteUtil {
return n ^ Integer.MIN_VALUE;
}
+ public static int toInt(byte[] bytes, int offset) {
+ return (((int)bytes[offset]) << 24) + (((int)bytes[offset + 1]) << 16) +
+ (((int)bytes[offset + 2]) << 8) + bytes[offset + 3];
+ }
+
+ public static void setInt(byte[] data, int offset, int value) {
+ data[offset] = (byte) (value >> 24);
+ data[offset + 1] = (byte) (value >> 16);
+ data[offset + 2] = (byte) (value >> 8);
+ data[offset + 3] = (byte) value;
+ }
+
/**
* float => byte[]
*
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonSessionExample.scala
----------------------------------------------------------------------
diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonSessionExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonSessionExample.scala
index 99cbd74..7432fe9 100644
--- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonSessionExample.scala
+++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonSessionExample.scala
@@ -36,9 +36,9 @@ object CarbonSessionExample {
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss")
.addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd")
+ .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING, "true")
import org.apache.spark.sql.CarbonSession._
-
val spark = SparkSession
.builder()
.master("local")
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala
----------------------------------------------------------------------
diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala
index 1d24e36..ee53c31 100644
--- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala
+++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala
@@ -314,7 +314,7 @@ object CompareTest {
// check result by comparing output from parquet and carbon
parquetResult.zipWithIndex.foreach { case (result, index) =>
if (result._2 != carbonResult(index)._2) {
- sys.error(s"result not matching for query ${index + 1}: " +
+ sys.error(s"result not matching for query ${index + 1} (${queries(index).desc}): " +
s"${result._2} and ${carbonResult(index)._2}")
}
}
@@ -338,6 +338,7 @@ object CompareTest {
.addProperty("carbon.enable.vector.reader", "true")
.addProperty("enable.unsafe.sort", "true")
.addProperty("carbon.blockletgroup.size.in.mb", "32")
+ .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING, "true")
import org.apache.spark.sql.CarbonSession._
val rootPath = new File(this.getClass.getResource("/").getPath
+ "../../../..").getCanonicalPath
@@ -346,6 +347,7 @@ object CompareTest {
.builder()
.master("local")
.enableHiveSupport()
+ .config("spark.driver.host", "127.0.0.1")
.getOrCreateCarbonSession(storeLocation)
spark.sparkContext.setLogLevel("warn")
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestNullAndEmptyFieldsUnsafe.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestNullAndEmptyFieldsUnsafe.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestNullAndEmptyFieldsUnsafe.scala
new file mode 100644
index 0000000..b97fb3d
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestNullAndEmptyFieldsUnsafe.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.bigdecimal
+
+import org.apache.spark.sql.common.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test cases for testing columns having null value
+ */
+class TestNullAndEmptyFieldsUnsafe extends QueryTest with BeforeAndAfterAll {
+
+ override def beforeAll {
+ sql("drop table if exists carbonTable")
+ sql("drop table if exists hiveTable")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+ .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING, "true")
+ val csvFilePath = s"$resourcesPath/nullandnonparsableValue.csv"
+ sql(
+ "CREATE TABLE IF NOT EXISTS carbonTable (ID String, date Timestamp, country String, name " +
+ "String, phonetype String, serialname String, salary Decimal(17,2))STORED BY 'org.apache" +
+ ".carbondata.format'"
+ )
+ sql(
+ "create table if not exists hiveTable(ID String, date Timestamp, country String, name " +
+ "String, " +
+ "phonetype String, serialname String, salary Decimal(17,2))row format delimited fields " +
+ "terminated by ','"
+ )
+ sql(
+ "LOAD DATA LOCAL INPATH '" + csvFilePath + "' into table carbonTable OPTIONS " +
+ "('FILEHEADER'='ID,date," +
+ "country,name,phonetype,serialname,salary')"
+ )
+ sql(
+ "LOAD DATA local inpath '" + csvFilePath + "' INTO table hiveTable"
+ )
+ }
+
+ test("test detail query on column having null values") {
+ checkAnswer(
+ sql("select * from carbonTable"),
+ sql("select * from hiveTable")
+ )
+ }
+
+ test("test filter query on column is null") {
+ checkAnswer(
+ sql("select * from carbonTable where salary is null"),
+ sql("select * from hiveTable where salary is null")
+ )
+ }
+
+ test("test filter query on column is not null") {
+ checkAnswer(
+ sql("select * from carbonTable where salary is not null"),
+ sql("select * from hiveTable where salary is not null")
+ )
+ }
+
+ test("test filter query on columnValue=null") {
+ checkAnswer(
+ sql("select * from carbonTable where salary=null"),
+ sql("select * from hiveTable where salary=null")
+ )
+ }
+
+ test("test filter query where date is null") {
+ checkAnswer(
+ sql("select * from carbonTable where date is null"),
+ sql("select * from hiveTable where date is null")
+ )
+ }
+
+ test("test subquery on column having null values") {
+ checkAnswer(
+ sql("select * from (select if(country='china','c', country) test from carbonTable)qq where test is null"),
+ sql("select * from (select if(country='china','c', country) test from hiveTable)qq where test is null")
+ )
+ }
+
+ test("test subquery on column having not null values") {
+ checkAnswer(
+ sql("select * from (select if(country='china','c', country) test from carbonTable)qq where test is not null"),
+ sql("select * from (select if(country='china','c', country) test from hiveTable)qq where test is not null")
+ )
+ }
+
+ override def afterAll {
+ sql("drop table if exists carbonTable")
+ sql("drop table if exists hiveTable")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+ .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING,
+ CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING_DEFAULT)
+ }
+}
+
+
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala
new file mode 100644
index 0000000..2a9d1d9
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala
@@ -0,0 +1,709 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.dataload
+
+import java.io.File
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test Class for data loading with Unsafe ColumnPage
+ *
+ */
+class TestLoadDataWithHiveSyntaxUnsafe extends QueryTest with BeforeAndAfterAll {
+
+ override def beforeAll {
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING,
+ "true"
+ )
+ sql("drop table if exists escapechar1")
+ sql("drop table if exists escapechar2")
+ sql("drop table if exists escapechar3")
+ sql("drop table if exists specialcharacter1")
+ sql("drop table if exists specialcharacter2")
+ sql("drop table if exists collessthanschema")
+ sql("drop table if exists decimalarray")
+ sql("drop table if exists decimalstruct")
+ sql("drop table if exists carbontable")
+ sql("drop table if exists hivetable")
+ sql("drop table if exists testtable")
+ sql("drop table if exists testhivetable")
+ sql("drop table if exists testtable1")
+ sql("drop table if exists testhivetable1")
+ sql("drop table if exists complexcarbontable")
+ sql("drop table if exists complex_t3")
+ sql("drop table if exists complex_hive_t3")
+ sql("drop table if exists header_test")
+ sql("drop table if exists duplicateColTest")
+ sql("drop table if exists mixed_header_test")
+ sql("drop table if exists primitivecarbontable")
+ sql("drop table if exists UPPERCASEcube")
+ sql("drop table if exists lowercaseCUBE")
+ sql("drop table if exists carbontable1")
+ sql("drop table if exists hivetable1")
+ sql("drop table if exists comment_test")
+ sql("drop table if exists smallinttable")
+ sql("drop table if exists smallinthivetable")
+ sql(
+ "CREATE table carbontable (empno int, empname String, designation String, doj String, " +
+ "workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, " +
+ "projectcode int, projectjoindate String, projectenddate String, attendance int," +
+ "utilization int,salary int) STORED BY 'org.apache.carbondata.format'"
+ )
+ sql(
+ "create table hivetable(empno int, empname String, designation string, doj String, " +
+ "workgroupcategory int, workgroupcategoryname String,deptno int, deptname String, " +
+ "projectcode int, projectjoindate String,projectenddate String, attendance String," +
+ "utilization String,salary String)row format delimited fields terminated by ','"
+ )
+
+ }
+
+ test("create table with smallint type and query smallint table") {
+ sql("drop table if exists smallinttable")
+ sql("drop table if exists smallinthivetable")
+ sql(
+ "create table smallinttable(empno smallint, empname String, designation string, " +
+ "doj String, workgroupcategory int, workgroupcategoryname String,deptno int, " +
+ "deptname String, projectcode int, projectjoindate String,projectenddate String, " +
+ "attendance String, utilization String,salary String)" +
+ "STORED BY 'org.apache.carbondata.format'"
+ )
+
+ sql(
+ "create table smallinthivetable(empno smallint, empname String, designation string, " +
+ "doj String, workgroupcategory int, workgroupcategoryname String,deptno int, " +
+ "deptname String, projectcode int, projectjoindate String,projectenddate String, " +
+ "attendance String, utilization String,salary String)" +
+ "row format delimited fields terminated by ','"
+ )
+
+ sql(s"LOAD DATA local inpath '$resourcesPath/data.csv' INTO table smallinttable ")
+ sql(s"LOAD DATA local inpath '$resourcesPath/datawithoutheader.csv' overwrite " +
+ "INTO table smallinthivetable")
+
+ checkAnswer(
+ sql("select empno from smallinttable"),
+ sql("select empno from smallinthivetable")
+ )
+
+ sql("drop table if exists smallinttable")
+ sql("drop table if exists smallinthivetable")
+ }
+
+ test("test data loading and validate query output") {
+ sql("drop table if exists testtable")
+ sql("drop table if exists testhivetable")
+ //Create test cube and hive table
+ sql(
+ "CREATE table testtable (empno string, empname String, designation String, doj String, " +
+ "workgroupcategory string, workgroupcategoryname String, deptno string, deptname String, " +
+ "projectcode string, projectjoindate String, projectenddate String,attendance double," +
+ "utilization double,salary double) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES" +
+ "('DICTIONARY_EXCLUDE'='empno,empname,designation,doj,workgroupcategory," +
+ "workgroupcategoryname,deptno,deptname,projectcode,projectjoindate,projectenddate')"
+ )
+ sql(
+ "create table testhivetable(empno string, empname String, designation string, doj String, " +
+ "workgroupcategory string, workgroupcategoryname String,deptno string, deptname String, " +
+ "projectcode string, projectjoindate String,projectenddate String, attendance double," +
+ "utilization double,salary double)row format delimited fields terminated by ','"
+ )
+ //load data into test cube and hive table and validate query result
+ sql(s"LOAD DATA local inpath '$resourcesPath/data.csv' INTO table testtable")
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/datawithoutheader.csv' overwrite INTO table " +
+ "testhivetable"
+ )
+ checkAnswer(sql("select * from testtable"), sql("select * from testhivetable"))
+ //load data incrementally and validate query result
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE testtable OPTIONS" +
+ "('DELIMITER'= ',', 'QUOTECHAR'= '\"')"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/datawithoutheader.csv' INTO table testhivetable"
+ )
+ checkAnswer(sql("select * from testtable"), sql("select * from testhivetable"))
+ //drop test cube and table
+ sql("drop table if exists testtable")
+ sql("drop table if exists testhivetable")
+ }
+
+ /**
+ * TODO: temporarily changing cube names to different names,
+ * however deletion and creation of cube with same name
+ */
+ test("test data loading with different case file header and validate query output") {
+ sql("drop table if exists testtable1")
+ sql("drop table if exists testhivetable1")
+ //Create test cube and hive table
+ sql(
+ "CREATE table testtable1 (empno string, empname String, designation String, doj String, " +
+ "workgroupcategory string, workgroupcategoryname String, deptno string, deptname String, " +
+ "projectcode string, projectjoindate String, projectenddate String,attendance double," +
+ "utilization double,salary double) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES" +
+ "('DICTIONARY_EXCLUDE'='empno,empname,designation,doj,workgroupcategory," +
+ "workgroupcategoryname,deptno,deptname,projectcode,projectjoindate,projectenddate')"
+ )
+ sql(
+ "create table testhivetable1(empno string, empname String, designation string, doj String, " +
+ "workgroupcategory string, workgroupcategoryname String,deptno string, deptname String, " +
+ "projectcode string, projectjoindate String,projectenddate String, attendance double," +
+ "utilization double,salary double)row format delimited fields terminated by ','"
+ )
+ //load data into test cube and hive table and validate query result
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/datawithoutheader.csv' INTO table testtable1 " +
+ "options('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='EMPno, empname,designation,doj," +
+ "workgroupcategory,workgroupcategoryname, deptno,deptname,projectcode,projectjoindate," +
+ "projectenddate, attendance, utilization,SALARY')"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/datawithoutheader.csv' overwrite INTO table " +
+ "testhivetable1"
+ )
+ checkAnswer(sql("select * from testtable1"), sql("select * from testhivetable1"))
+ //drop test cube and table
+ sql("drop table if exists testtable1")
+ sql("drop table if exists testhivetable1")
+ }
+
+ test("test hive table data loading") {
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/datawithoutheader.csv' overwrite INTO table " +
+ "hivetable"
+ )
+ sql(s"LOAD DATA local inpath '$resourcesPath/datawithoutheader.csv' INTO table hivetable")
+ }
+
+ test("test carbon table data loading using old syntax") {
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE carbontable OPTIONS" +
+ "('DELIMITER'= ',', 'QUOTECHAR'= '\"')"
+ )
+ }
+
+ test("test carbon table data loading using new syntax compatible with hive") {
+ sql(s"LOAD DATA local inpath '$resourcesPath/data.csv' INTO table carbontable")
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/data.csv' INTO table carbontable options" +
+ "('DELIMITER'=',', 'QUOTECHAR'='\"')"
+ )
+ }
+
+ test("test carbon table data loading using new syntax with overwrite option compatible with hive")
+ {
+ try {
+ sql(s"LOAD DATA local inpath '$resourcesPath/data.csv' overwrite INTO table carbontable")
+ } catch {
+ case e: Throwable => {
+ assert(e.getMessage
+ .equals("Overwrite is not supported for carbon table with default.carbontable")
+ )
+ }
+ }
+ }
+
+ test("complex types data loading") {
+ sql("drop table if exists complexcarbontable")
+ sql("create table complexcarbontable(deviceInformationId int, channelsId string," +
+ "ROMSize string, purchasedate string, mobile struct<imei:string, imsi:string>," +
+ "MAC array<string>, locationinfo array<struct<ActiveAreaId:int, ActiveCountry:string, " +
+ "ActiveProvince:string, Activecity:string, ActiveDistrict:string, ActiveStreet:string>>," +
+ "proddate struct<productionDate:string,activeDeactivedate:array<string>>, gamePointId " +
+ "double,contractNumber double) " +
+ "STORED BY 'org.apache.carbondata.format' " +
+ "TBLPROPERTIES ('DICTIONARY_INCLUDE'='deviceInformationId')"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/complexdata.csv' INTO table " +
+ "complexcarbontable " +
+ "OPTIONS('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='deviceInformationId,channelsId," +
+ "ROMSize,purchasedate,mobile,MAC,locationinfo,proddate,gamePointId,contractNumber'," +
+ "'COMPLEX_DELIMITER_LEVEL_1'='$', 'COMPLEX_DELIMITER_LEVEL_2'=':')"
+ )
+ sql("drop table if exists complexcarbontable")
+ }
+
+ test(
+ "complex types data loading with more unused columns and different order of complex columns " +
+ "in csv and create table"
+ ) {
+ sql("drop table if exists complexcarbontable")
+ sql("create table complexcarbontable(deviceInformationId int, channelsId string," +
+ "mobile struct<imei:string, imsi:string>, ROMSize string, purchasedate string," +
+ "MAC array<string>, locationinfo array<struct<ActiveAreaId:int, ActiveCountry:string, " +
+ "ActiveProvince:string, Activecity:string, ActiveDistrict:string, ActiveStreet:string>>," +
+ "proddate struct<productionDate:string,activeDeactivedate:array<string>>, gamePointId " +
+ "double,contractNumber double) " +
+ "STORED BY 'org.apache.carbondata.format' " +
+ "TBLPROPERTIES ('DICTIONARY_INCLUDE'='deviceInformationId','DICTIONARY_EXCLUDE'='channelsId')"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/complextypediffentcolheaderorder.csv' INTO " +
+ "table complexcarbontable " +
+ "OPTIONS('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='deviceInformationId,channelsId," +
+ "ROMSize,purchasedate,MAC,abc,mobile,locationinfo,proddate,gamePointId,contractNumber'," +
+ "'COMPLEX_DELIMITER_LEVEL_1'='$', 'COMPLEX_DELIMITER_LEVEL_2'=':')"
+ )
+ sql("select count(*) from complexcarbontable")
+ sql("drop table if exists complexcarbontable")
+ }
+
+ test("test carbon table data loading with csv file Header in caps") {
+ sql("drop table if exists header_test")
+ sql(
+ "create table header_test(empno int, empname String, designation string, doj String, " +
+ "workgroupcategory int, workgroupcategoryname String,deptno int, deptname String, " +
+ "projectcode int, projectjoindate String,projectenddate String, attendance String," +
+ "utilization String,salary String) STORED BY 'org.apache.carbondata.format'"
+ )
+ val csvFilePath = s"$resourcesPath/data_withCAPSHeader.csv"
+ sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table header_test OPTIONS " +
+ "('DELIMITER'=',', 'QUOTECHAR'='\"')");
+ checkAnswer(sql("select empno from header_test"),
+ Seq(Row(11), Row(12))
+ )
+ }
+
+ test("test duplicate column validation") {
+ try {
+ sql("create table duplicateColTest(col1 string, Col1 string)")
+ }
+ catch {
+ case e: Exception => {
+ assert(e.getMessage.contains("Duplicate column name") ||
+ e.getMessage.contains("Found duplicate column"))
+ }
+ }
+ }
+
+ test(
+ "test carbon table data loading with csv file Header in Mixed Case and create table columns " +
+ "in mixed case"
+ ) {
+ sql("drop table if exists mixed_header_test")
+ sql(
+ "create table mixed_header_test(empno int, empname String, Designation string, doj String, " +
+ "Workgroupcategory int, workgroupcategoryname String,deptno int, deptname String, " +
+ "projectcode int, projectjoindate String,projectenddate String, attendance String," +
+ "utilization String,salary String) STORED BY 'org.apache.carbondata.format'"
+ )
+ val csvFilePath = s"$resourcesPath/data_withMixedHeader.csv"
+ sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table mixed_header_test OPTIONS " +
+ "('DELIMITER'=',', 'QUOTECHAR'='\"')");
+ checkAnswer(sql("select empno from mixed_header_test"),
+ Seq(Row(11), Row(12))
+ )
+ }
+
+
+ test("complex types data loading with hive column having more than required column values") {
+ sql("drop table if exists complexcarbontable")
+ sql("create table complexcarbontable(deviceInformationId int, channelsId string," +
+ "ROMSize string, purchasedate string, mobile struct<imei:string, imsi:string>," +
+ "MAC array<string>, locationinfo array<struct<ActiveAreaId:int, ActiveCountry:string, " +
+ "ActiveProvince:string, Activecity:string, ActiveDistrict:string, ActiveStreet:string>>," +
+ "proddate struct<productionDate:string,activeDeactivedate:array<string>>, gamePointId " +
+ "double,contractNumber double) " +
+ "STORED BY 'org.apache.carbondata.format' " +
+ "TBLPROPERTIES ('DICTIONARY_INCLUDE'='deviceInformationId')"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/complexdatastructextra.csv' INTO table " +
+ "complexcarbontable " +
+ "OPTIONS('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='deviceInformationId,channelsId," +
+ "ROMSize,purchasedate,mobile,MAC,locationinfo,proddate,gamePointId,contractNumber'," +
+ "'COMPLEX_DELIMITER_LEVEL_1'='$', 'COMPLEX_DELIMITER_LEVEL_2'=':')"
+ )
+ sql("drop table if exists complexcarbontable")
+ }
+
+ test("complex types & no dictionary columns data loading") {
+ sql("drop table if exists complexcarbontable")
+ sql("create table complexcarbontable(deviceInformationId int, channelsId string," +
+ "ROMSize string, purchasedate string, mobile struct<imei:string, imsi:string>," +
+ "MAC array<string>, locationinfo array<struct<ActiveAreaId:int, ActiveCountry:string, " +
+ "ActiveProvince:string, Activecity:string, ActiveDistrict:string, ActiveStreet:string>>," +
+ "proddate struct<productionDate:string,activeDeactivedate:array<string>>, gamePointId " +
+ "double,contractNumber double) " +
+ "STORED BY 'org.apache.carbondata.format' " +
+ "TBLPROPERTIES ('DICTIONARY_INCLUDE'='deviceInformationId', 'DICTIONARY_EXCLUDE'='ROMSize," +
+ "purchasedate')"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/complexdata.csv' INTO table " +
+ "complexcarbontable " +
+ "OPTIONS('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='deviceInformationId,channelsId," +
+ "ROMSize,purchasedate,mobile,MAC,locationinfo,proddate,gamePointId,contractNumber'," +
+ "'COMPLEX_DELIMITER_LEVEL_1'='$', 'COMPLEX_DELIMITER_LEVEL_2'=':')"
+ );
+ sql("drop table if exists complexcarbontable")
+ }
+
+ test("array<string> and string datatype for same column is not working properly") {
+ sql("drop table if exists complexcarbontable")
+ sql("create table complexcarbontable(deviceInformationId int, MAC array<string>, channelsId string, "+
+ "ROMSize string, purchasedate string, gamePointId double,contractNumber double) STORED BY 'org.apache.carbondata.format' "+
+ "TBLPROPERTIES ('DICTIONARY_INCLUDE'='deviceInformationId')")
+ sql(s"LOAD DATA local inpath '$resourcesPath/complexdatareordered.csv' INTO table complexcarbontable "+
+ "OPTIONS('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='deviceInformationId,MAC,channelsId,ROMSize,purchasedate,gamePointId,contractNumber',"+
+ "'COMPLEX_DELIMITER_LEVEL_1'='$', 'COMPLEX_DELIMITER_LEVEL_2'=':')")
+ sql("drop table if exists complexcarbontable")
+ sql("create table primitivecarbontable(deviceInformationId int, MAC string, channelsId string, "+
+ "ROMSize string, purchasedate string, gamePointId double,contractNumber double) STORED BY 'org.apache.carbondata.format' "+
+ "TBLPROPERTIES ('DICTIONARY_INCLUDE'='deviceInformationId')")
+ sql(s"LOAD DATA local inpath '$resourcesPath/complexdatareordered.csv' INTO table primitivecarbontable "+
+ "OPTIONS('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='deviceInformationId,MAC,channelsId,ROMSize,purchasedate,gamePointId,contractNumber',"+
+ "'COMPLEX_DELIMITER_LEVEL_1'='$', 'COMPLEX_DELIMITER_LEVEL_2'=':')")
+ sql("drop table if exists primitivecarbontable")
+ }
+
+ test(
+ "test carbon table data loading when table name is in different case with create table, for " +
+ "UpperCase"
+ ) {
+ sql("drop table if exists UPPERCASEcube")
+ sql("create table UPPERCASEcube(empno Int, empname String, designation String, " +
+ "doj String, workgroupcategory Int, workgroupcategoryname String, deptno Int, " +
+ "deptname String, projectcode Int, projectjoindate String, projectenddate String, " +
+ "attendance Int,utilization Double,salary Double) STORED BY 'org.apache.carbondata.format'"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/data.csv' INTO table uppercasecube OPTIONS" +
+ "('DELIMITER'=',', 'QUOTECHAR'='\"')"
+ )
+ sql("drop table if exists UpperCaseCube")
+ }
+
+ test(
+ "test carbon table data loading when table name is in different case with create table ,for " +
+ "LowerCase"
+ ) {
+ sql("drop table if exists lowercaseCUBE")
+ sql("create table lowercaseCUBE(empno Int, empname String, designation String, " +
+ "doj String, workgroupcategory Int, workgroupcategoryname String, deptno Int, " +
+ "deptname String, projectcode Int, projectjoindate String, projectenddate String, " +
+ "attendance Int,utilization Double,salary Double) STORED BY 'org.apache.carbondata.format'"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/data.csv' INTO table LOWERCASECUBE OPTIONS" +
+ "('DELIMITER'=',', 'QUOTECHAR'='\"')"
+ )
+ sql("drop table if exists LowErcasEcube")
+ }
+
+ test("test carbon table data loading using escape char 1") {
+ sql("DROP TABLE IF EXISTS escapechar1")
+
+ sql(
+ """
+ CREATE TABLE IF NOT EXISTS escapechar1
+ (ID Int, date Timestamp, country String,
+ name String, phonetype String, serialname String, salary Int)
+ STORED BY 'org.apache.carbondata.format'
+ """
+ )
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+ sql(
+ s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/datawithbackslash.csv' into table escapechar1
+ OPTIONS('ESCAPECHAR'='@')
+ """
+ )
+ checkAnswer(sql("select count(*) from escapechar1"), Seq(Row(10)))
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+ sql("DROP TABLE IF EXISTS escapechar1")
+ }
+
+ test("test carbon table data loading using escape char 2") {
+ sql("DROP TABLE IF EXISTS escapechar2")
+
+ sql(
+ """
+ CREATE TABLE escapechar2(imei string,specialchar string)
+ STORED BY 'org.apache.carbondata.format'
+ """
+ )
+
+ sql(
+ s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/datawithescapecharacter.csv' into table escapechar2
+ options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='\')
+ """
+ )
+ checkAnswer(sql("select count(*) from escapechar2"), Seq(Row(21)))
+ checkAnswer(sql("select specialchar from escapechar2 where imei = '1AA44'"), Seq(Row("escapeesc")))
+ sql("DROP TABLE IF EXISTS escapechar2")
+ }
+
+ test("test carbon table data loading using escape char 3") {
+ sql("DROP TABLE IF EXISTS escapechar3")
+
+ sql(
+ """
+ CREATE TABLE escapechar3(imei string,specialchar string)
+ STORED BY 'org.apache.carbondata.format'
+ """
+ )
+
+ sql(
+ s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/datawithescapecharacter.csv' into table escapechar3
+ options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='@')
+ """
+ )
+ checkAnswer(sql("select count(*) from escapechar3"), Seq(Row(21)))
+ checkAnswer(sql("select specialchar from escapechar3 where imei in ('1232','12323')"), Seq(Row
+ ("ayush@b.com"), Row("ayushb.com")
+ )
+ )
+ sql("DROP TABLE IF EXISTS escapechar3")
+ }
+
+ test("test carbon table data loading with special character 1") {
+ sql("DROP TABLE IF EXISTS specialcharacter1")
+
+ sql(
+ """
+ CREATE TABLE specialcharacter1(imei string,specialchar string)
+ STORED BY 'org.apache.carbondata.format'
+ """
+ )
+
+ sql(
+ s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/datawithspecialcharacter.csv' into table specialcharacter1
+ options ('DELIMITER'=',', 'QUOTECHAR'='"')
+ """
+ )
+ checkAnswer(sql("select count(*) from specialcharacter1"), Seq(Row(37)))
+ checkAnswer(sql("select specialchar from specialcharacter1 where imei='1AA36'"), Seq(Row("\"i\"")))
+ sql("DROP TABLE IF EXISTS specialcharacter1")
+ }
+
+ test("test carbon table data loading with special character 2") {
+ sql("DROP TABLE IF EXISTS specialcharacter2")
+
+ sql(
+ """
+ CREATE table specialcharacter2(customer_id int, 124_string_level_province String, date_level String,
+ Time_level String, lname String, fname String, mi String, address1 String, address2
+ String, address3 String, address4 String, city String, country String, phone1 String,
+ phone2 String, marital_status String, yearly_income String, gender String, education
+ String, member_card String, occupation String, houseowner String, fullname String,
+ numeric_level double, account_num double, customer_region_id int, total_children int,
+ num_children_at_home int, num_cars_owned int)
+ STORED BY 'org.apache.carbondata.format'
+ """
+ )
+
+ sql(
+ s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/datawithcomplexspecialchar.csv' into
+ table specialcharacter2 options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='"')
+ """
+ )
+ checkAnswer(sql("select count(*) from specialcharacter2"), Seq(Row(150)))
+ checkAnswer(sql("select 124_string_level_province from specialcharacter2 where customer_id=103"),
+ Seq(Row("\"state province # 124\""))
+ )
+ sql("DROP TABLE IF EXISTS specialcharacter2")
+ }
+
+ test("test data which contain column less than schema"){
+ sql("DROP TABLE IF EXISTS collessthanschema")
+
+ sql(
+ """
+ CREATE TABLE IF NOT EXISTS collessthanschema
+ (ID Int, date Timestamp, country String,
+ name String, phonetype String, serialname String, salary Int)
+ STORED BY 'org.apache.carbondata.format'
+ """)
+
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+ sql(s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/lessthandatacolumndata.csv' into table collessthanschema
+ """)
+ checkAnswer(sql("select count(*) from collessthanschema"),Seq(Row(10)))
+ sql("DROP TABLE IF EXISTS collessthanschema")
+ }
+
+ test("test data which contain column with decimal data type in array."){
+ sql("DROP TABLE IF EXISTS decimalarray")
+
+ sql(
+ """
+ CREATE TABLE IF NOT EXISTS decimalarray
+ (ID decimal(5,5), date Timestamp, country String,
+ name String, phonetype String, serialname String, salary Int, complex
+ array<decimal(4,2)>)
+ STORED BY 'org.apache.carbondata.format'
+ """
+ )
+
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+ sql(s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/complexTypeDecimal.csv' into table decimalarray
+ """)
+ checkAnswer(sql("select count(*) from decimalarray"),Seq(Row(8)))
+ sql("DROP TABLE IF EXISTS decimalarray")
+ }
+
+ test("test data which contain column with decimal data type in struct."){
+ sql("DROP TABLE IF EXISTS decimalstruct")
+
+ sql(
+ """
+ CREATE TABLE IF NOT EXISTS decimalstruct
+ (ID decimal(5,5), date Timestamp, country String,
+ name String, phonetype String, serialname String, salary Int, complex
+ struct<a:decimal(4,2)>)
+ STORED BY 'org.apache.carbondata.format'
+ """
+ )
+
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+ sql(s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/complexTypeDecimal.csv' into table decimalstruct
+ """)
+ checkAnswer(sql("select count(*) from decimalstruct"),Seq(Row(8)))
+ sql("DROP TABLE IF EXISTS decimalstruct")
+ }
+
+ test("test data which contain column with decimal data type in array of struct."){
+ sql("DROP TABLE IF EXISTS complex_t3")
+ sql("DROP TABLE IF EXISTS complex_hive_t3")
+
+ sql(
+ """
+ CREATE TABLE complex_t3
+ (ID decimal, date Timestamp, country String,
+ name String, phonetype String, serialname String, salary Int, complex
+ array<struct<a:decimal(4,2),str:string>>)
+ STORED BY 'org.apache.carbondata.format'
+ """
+ )
+ sql(
+ """
+ CREATE TABLE complex_hive_t3
+ (ID decimal, date Timestamp, country String,
+ name String, phonetype String, serialname String, salary Int, complex
+ array<struct<a:decimal(4,2),str:string>>)
+ row format delimited fields terminated by ','
+ """
+ )
+
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+ sql(s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/complexTypeDecimalNested.csv' into table complex_t3
+ """)
+ sql(s"""
+ LOAD DATA LOCAL INPATH '$resourcesPath/complexTypeDecimalNestedHive.csv' into table complex_hive_t3
+ """)
+ checkAnswer(sql("select count(*) from complex_t3"),sql("select count(*) from complex_hive_t3"))
+ checkAnswer(sql("select id from complex_t3 where salary = 15000"),sql("select id from complex_hive_t3 where salary = 15000"))
+ }
+
+ test("test data loading when delimiter is '|' and data with header") {
+ sql(
+ "CREATE table carbontable1 (empno string, empname String, designation String, doj String, " +
+ "workgroupcategory string, workgroupcategoryname String, deptno string, deptname String, " +
+ "projectcode string, projectjoindate String, projectenddate String,attendance double," +
+ "utilization double,salary double) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES" +
+ "('DICTIONARY_EXCLUDE'='empno,empname,designation,doj,workgroupcategory," +
+ "workgroupcategoryname,deptno,deptname,projectcode,projectjoindate,projectenddate')"
+ )
+ sql(
+ "create table hivetable1 (empno string, empname String, designation string, doj String, " +
+ "workgroupcategory string, workgroupcategoryname String,deptno string, deptname String, " +
+ "projectcode string, projectjoindate String,projectenddate String, attendance double," +
+ "utilization double,salary double)row format delimited fields terminated by ','"
+ )
+
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/datadelimiter.csv' INTO TABLE carbontable1 OPTIONS" +
+ "('DELIMITER'= '|', 'QUOTECHAR'= '\"')"
+ )
+
+ sql(s"LOAD DATA local inpath '$resourcesPath/datawithoutheader.csv' INTO table hivetable1")
+
+ checkAnswer(sql("select * from carbontable1"), sql("select * from hivetable1"))
+ }
+
+ test("test data loading with comment option") {
+ sql("drop table if exists comment_test")
+ sql(
+ "create table comment_test(imei string, age int, task bigint, num double, level decimal(10," +
+ "3), productdate timestamp, mark int, name string) STORED BY 'org.apache.carbondata.format'"
+ )
+ sql(
+ s"LOAD DATA local inpath '$resourcesPath/comment.csv' INTO TABLE comment_test " +
+ "options('DELIMITER' = ',', 'QUOTECHAR' = '.', 'COMMENTCHAR' = '?','FILEHEADER'='imei,age,task,num,level,productdate,mark,name', 'maxcolumns'='180')"
+ )
+ checkAnswer(sql("select imei from comment_test"),Seq(Row("\".carbon"),Row("#?carbon"), Row(""),
+ Row("~carbon,")))
+ }
+
+ override def afterAll {
+ sql("drop table if exists escapechar1")
+ sql("drop table if exists escapechar2")
+ sql("drop table if exists escapechar3")
+ sql("drop table if exists specialcharacter1")
+ sql("drop table if exists specialcharacter2")
+ sql("drop table if exists collessthanschema")
+ sql("drop table if exists decimalarray")
+ sql("drop table if exists decimalstruct")
+ sql("drop table if exists carbontable")
+ sql("drop table if exists hivetable")
+ sql("drop table if exists testtable")
+ sql("drop table if exists testhivetable")
+ sql("drop table if exists testtable1")
+ sql("drop table if exists testhivetable1")
+ sql("drop table if exists complexcarbontable")
+ sql("drop table if exists complex_t3")
+ sql("drop table if exists complex_hive_t3")
+ sql("drop table if exists header_test")
+ sql("drop table if exists duplicateColTest")
+ sql("drop table if exists mixed_header_test")
+ sql("drop table if exists primitivecarbontable")
+ sql("drop table if exists UPPERCASEcube")
+ sql("drop table if exists lowercaseCUBE")
+ sql("drop table if exists carbontable1")
+ sql("drop table if exists hivetable1")
+ sql("drop table if exists comment_test")
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING,
+ CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING_DEFAULT
+ )
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/integration/spark2/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala
index 52e3764..2995e60 100644
--- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala
+++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala
@@ -87,7 +87,7 @@ class CarbonDataSourceSuite extends QueryTest with BeforeAndAfterAll {
CarbonProperties.getInstance()
.addProperty("carbon.blockletgroup.size.in.mb", "16")
.addProperty("carbon.enable.vector.reader", "true")
- .addProperty("enable.unsafe.sort", "true")
+ //.addProperty("enable.unsafe.sort", "true")
val rdd = sqlContext.sparkContext
.parallelize(1 to 1200000, 4)
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
index be86808..b37c1e4 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
@@ -42,6 +42,7 @@ import org.apache.carbondata.core.datastore.row.CarbonRow;
import org.apache.carbondata.core.keygenerator.KeyGenException;
import org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter;
import org.apache.carbondata.core.keygenerator.columnar.impl.MultiDimKeyVarLengthEquiSplitGenerator;
+import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.CarbonMetadata;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.metadata.datatype.DataType;
@@ -343,7 +344,7 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler {
* generate the NodeHolder from the input rows (one page in case of V3 format)
*/
private NodeHolder processDataRows(List<CarbonRow> dataRows)
- throws CarbonDataWriterException, KeyGenException {
+ throws CarbonDataWriterException, KeyGenException, MemoryException {
if (dataRows.size() == 0) {
return new NodeHolder();
}
@@ -364,10 +365,11 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler {
TablePageStatistics tablePageStatistics = new TablePageStatistics(
model.getTableSpec(), tablePage, encodedData, tablePage.getMeasureStats());
- LOGGER.info("Number Of records processed: " + dataRows.size());
+ NodeHolder nodeHolder = dataWriter.buildDataNodeHolder(encodedData, tablePageStatistics, keys);
+ tablePage.freeMemory();
- // TODO: writer interface should be modified to use TablePage
- return dataWriter.buildDataNodeHolder(encodedData, tablePageStatistics, keys);
+ LOGGER.info("Number Of records processed: " + dataRows.size());
+ return nodeHolder;
}
/**
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
index 65504cd..f068400 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
@@ -33,11 +33,13 @@ import org.apache.carbondata.core.datastore.page.statistics.MeasurePageStatsVO;
import org.apache.carbondata.core.datastore.row.CarbonRow;
import org.apache.carbondata.core.datastore.row.WriteStepRowUtil;
import org.apache.carbondata.core.keygenerator.KeyGenException;
+import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.spark.sql.types.Decimal;
+
/**
* Represent a page data for all columns, we store its data in columnar layout, so that
* all processing apply to TablePage can be done in vectorized fashion.
@@ -61,17 +63,17 @@ public class TablePage {
private CarbonFactDataHandlerModel model;
- public TablePage(CarbonFactDataHandlerModel model, int pageSize) {
+ TablePage(CarbonFactDataHandlerModel model, int pageSize) throws MemoryException {
this.model = model;
this.pageSize = pageSize;
int numDictDimension = model.getMDKeyGenerator().getDimCount();
dictDimensionPage = new ColumnPage[numDictDimension];
for (int i = 0; i < dictDimensionPage.length; i++) {
- dictDimensionPage[i] = ColumnPage.newPage(DataType.BYTE_ARRAY, pageSize);
+ dictDimensionPage[i] = ColumnPage.newVarLengthPath(DataType.BYTE_ARRAY, pageSize);
}
noDictDimensionPage = new ColumnPage[model.getNoDictionaryCount()];
for (int i = 0; i < noDictDimensionPage.length; i++) {
- noDictDimensionPage[i] = ColumnPage.newPage(DataType.BYTE_ARRAY, pageSize);
+ noDictDimensionPage[i] = ColumnPage.newVarLengthPath(DataType.BYTE_ARRAY, pageSize);
}
complexDimensionPage = new ComplexColumnPage[model.getComplexColumnCount()];
for (int i = 0; i < complexDimensionPage.length; i++) {
@@ -190,38 +192,51 @@ public class TablePage {
}
}
+ void freeMemory() {
+ for (ColumnPage page : dictDimensionPage) {
+ page.freeMemory();
+ }
+ for (ColumnPage page : noDictDimensionPage) {
+ page.freeMemory();
+ }
+ for (ColumnPage page : measurePage) {
+ page.freeMemory();
+ }
+ }
+
// Adds length as a short element (first 2 bytes) to the head of the input byte array
private byte[] addLengthToByteArray(byte[] input) {
+ if (input.length > Short.MAX_VALUE) {
+ throw new RuntimeException("input data length " + input.length +
+ " bytes too long, maximum length supported is " + Short.MAX_VALUE + " bytes");
+ }
byte[] output = new byte[input.length + 2];
ByteBuffer buffer = ByteBuffer.wrap(output);
- buffer.putShort((short) input.length);
+ buffer.putShort((short)input.length);
buffer.put(input, 0, input.length);
return output;
}
- public ColumnPage[] getDictDimensionPage() {
+ ColumnPage[] getDictDimensionPage() {
return dictDimensionPage;
}
- public ColumnPage[] getNoDictDimensionPage() {
+ ColumnPage[] getNoDictDimensionPage() {
return noDictDimensionPage;
}
- public ComplexColumnPage[] getComplexDimensionPage() {
+ ComplexColumnPage[] getComplexDimensionPage() {
return complexDimensionPage;
}
- public ColumnPage[] getMeasurePage() {
+ ColumnPage[] getMeasurePage() {
return measurePage;
}
- public MeasurePageStatsVO getMeasureStats() {
+ MeasurePageStatsVO getMeasureStats() {
return measurePageStatistics;
}
- public int getPageSize() {
- return pageSize;
- }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/processing/src/main/java/org/apache/carbondata/processing/store/TablePageEncoder.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/TablePageEncoder.java b/processing/src/main/java/org/apache/carbondata/processing/store/TablePageEncoder.java
index 18c2297..956a87f 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/TablePageEncoder.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/TablePageEncoder.java
@@ -33,6 +33,7 @@ import org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingStrateg
import org.apache.carbondata.core.datastore.page.encoding.EncodedData;
import org.apache.carbondata.core.datastore.page.encoding.EncodingStrategy;
import org.apache.carbondata.core.keygenerator.KeyGenException;
+import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.CarbonProperties;
@@ -54,7 +55,7 @@ public class TablePageEncoder {
}
// function to apply all columns in one table page
- public EncodedData encode(TablePage tablePage) throws KeyGenException {
+ public EncodedData encode(TablePage tablePage) throws KeyGenException, MemoryException {
EncodedData encodedData = new EncodedData();
encodeAndCompressDimensions(tablePage, encodedData);
encodeAndCompressMeasures(tablePage, encodedData);
@@ -62,7 +63,8 @@ public class TablePageEncoder {
}
// apply measure and set encodedData in `encodedData`
- private void encodeAndCompressMeasures(TablePage tablePage, EncodedData encodedData) {
+ private void encodeAndCompressMeasures(TablePage tablePage, EncodedData encodedData)
+ throws MemoryException {
ColumnPage[] measurePage = tablePage.getMeasurePage();
byte[][] encodedMeasures = new byte[measurePage.length][];
for (int i = 0; i < measurePage.length; i++) {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7359601b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
index a63e902..dd731a8 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
@@ -395,9 +395,11 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter<short[]>
long dimensionOffset = 0;
long measureOffset = 0;
int numberOfRows = 0;
+ long totalSize = 0;
// calculate the number of rows in each blocklet
for (int j = 0; j < nodeHolderList.size(); j++) {
numberOfRows += nodeHolderList.get(j).getEntryCount();
+ totalSize += nodeHolderList.get(j).getHolderSize();
}
try {
for (int i = 0; i < numberOfDimension; i++) {