You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2018/06/22 01:34:14 UTC
[13/50] [abbrv] carbondata git commit: [CARBONDATA-2577]
[CARBONDATA-2579] Fixed issue in Avro logical type for nested Array and
document update
[CARBONDATA-2577] [CARBONDATA-2579] Fixed issue in Avro logical type for nested Array and document update
Problem: Nested Array logical type of date, timestamp-millis, timestamp-micros is not working.
Root cause: During the preparation of carbon schema from avro schema. For array nested type logical types were not handled.
Solution: Handle the logical types for array nested type during carbon schema preparation.
This closes #2361
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/041603dc
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/041603dc
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/041603dc
Branch: refs/heads/carbonstore
Commit: 041603dccf1d98348db36c4bf8e2e60d50a5bcc8
Parents: d401e06
Author: ajantha-bhat <aj...@gmail.com>
Authored: Mon Jun 4 16:12:48 2018 +0530
Committer: kumarvishal09 <ku...@gmail.com>
Committed: Tue Jun 5 19:21:07 2018 +0530
----------------------------------------------------------------------
docs/data-management-on-carbondata.md | 7 ++++-
docs/sdk-guide.md | 15 +++++++++++
.../TestNonTransactionalCarbonTable.scala | 28 +++++++++-----------
.../carbondata/sdk/file/AvroCarbonWriter.java | 26 ++++++++++++++++--
4 files changed, 58 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/041603dc/docs/data-management-on-carbondata.md
----------------------------------------------------------------------
diff --git a/docs/data-management-on-carbondata.md b/docs/data-management-on-carbondata.md
index 706209c..3326e9b 100644
--- a/docs/data-management-on-carbondata.md
+++ b/docs/data-management-on-carbondata.md
@@ -216,7 +216,12 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
This can be SDK output. Refer [SDK Writer Guide](https://github.com/apache/carbondata/blob/master/docs/sdk-writer-guide.md).
**Note:**
- Dropping of the external table should not delete the files present in the location.
+ 1. Dropping of the external table should not delete the files present in the location.
+ 2. When external table is created on non-transactional table data,
+ external table will be registered with the schema of carbondata files.
+ If multiple files with different schema is present, exception will be thrown.
+ So, If table registered with one schema and files are of different schema,
+ suggest to drop the external table and create again to register table with new schema.
## CREATE DATABASE
http://git-wip-us.apache.org/repos/asf/carbondata/blob/041603dc/docs/sdk-guide.md
----------------------------------------------------------------------
diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md
index 0f20dc3..e04698d 100644
--- a/docs/sdk-guide.md
+++ b/docs/sdk-guide.md
@@ -128,6 +128,21 @@ Each of SQL data types are mapped into data types of SDK. Following are the mapp
| STRING | DataTypes.STRING |
| DECIMAL | DataTypes.createDecimalType(precision, scale) |
+**NOTE:**
+ Carbon Supports below logical types of AVRO.
+ a. Date
+ The date logical type represents a date within the calendar, with no reference to a particular time zone or time of day.
+ A date logical type annotates an Avro int, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar).
+ b. Timestamp (millisecond precision)
+ The timestamp-millis logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond.
+ A timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC.
+ c. Timestamp (microsecond precision)
+ The timestamp-micros logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond.
+ A timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC.
+
+ Currently the values of logical types are not validated by carbon.
+ Expect that avro record passed by the user is already validated by avro record generator tools.
+
## Run SQL on files directly
Instead of creating table and query it, you can also query that file directly with SQL.
http://git-wip-us.apache.org/repos/asf/carbondata/blob/041603dc/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 14a63ca..b275bb8 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -1825,6 +1825,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
| "items": {
| "name": "EachdoorNums",
| "type": "int",
+ | "logicalType": "date",
| "default": -1
| }
| }
@@ -1849,8 +1850,8 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
buildAvroTestDataMultiLevel3_2(3, null)
}
- // test multi level -- 3 levels [array of array of array of int]
- test("test multi level support : array of array of array of int") {
+ // test multi level -- 3 levels [array of array of array of int with logical type]
+ test("test multi level support : array of array of array of int with logical type") {
buildAvroTestDataMultiLevel3_2Type()
assert(new File(writerPath).exists())
sql("DROP TABLE IF EXISTS sdkOutputTable")
@@ -1858,22 +1859,19 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
|'$writerPath' """.stripMargin)
- sql("select * from sdkOutputTable").show(false)
+ sql("select * from sdkOutputTable limit 1").show(false)
// TODO: Add a validation
/*
- +----+---+---------------------------------------------------------------------------+
- |name|age|BuildNum
- |
- +----+---+---------------------------------------------------------------------------+
- |bob |10 |[WrappedArray(WrappedArray(1, 2, 3), WrappedArray(4, 5, 6)), WrappedArray
- (WrappedArray(10, 20, 30), WrappedArray(40, 50, 60))]|
- |bob |10 |[WrappedArray(WrappedArray(1, 2, 3), WrappedArray(4, 5, 6)), WrappedArray
- (WrappedArray(10, 20, 30), WrappedArray(40, 50, 60))]|
- |bob |10 |[WrappedArray(WrappedArray(1, 2, 3), WrappedArray(4, 5, 6)), WrappedArray
- (WrappedArray(10, 20, 30), WrappedArray(40, 50, 60))]|
- +----+---+---------------------------------------------------------------------------+
- */
+ +----+---+------------------------------------------------------------------+
+ |name|age|BuildNum |
+ +----+---+------------------------------------------------------------------+
+ |bob |10 |[WrappedArray(WrappedArray(1970-01-02, 1970-01-03, 1970-01-04), |
+ | WrappedArray(1970-01-05, 1970-01-06, 1970-01-07)), |
+ | WrappedArray(WrappedArray(1970-01-11, 1970-01-21, 1970-01-31), |
+ | WrappedArray(1970-02-10, 1970-02-20, 1970-03-02))] |
+ +----+---+------------------------------------------------------------------+
+ */
sql("DROP TABLE sdkOutputTable")
// drop table should not delete the files
http://git-wip-us.apache.org/repos/asf/carbondata/blob/041603dc/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java
index edecd6b..fdd1f5a 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java
@@ -323,13 +323,35 @@ public class AvroCarbonWriter extends CarbonWriter {
}
private static DataType getMappingDataTypeForArrayRecord(Schema childSchema) {
+ LogicalType logicalType = childSchema.getLogicalType();
switch (childSchema.getType()) {
case BOOLEAN:
return DataTypes.BOOLEAN;
case INT:
- return DataTypes.INT;
+ if (logicalType != null) {
+ if (logicalType instanceof LogicalTypes.Date) {
+ return DataTypes.DATE;
+ } else {
+ LOGGER.warn("Unsupported logical type. Considering Data Type as INT for " + childSchema
+ .getName());
+ return DataTypes.INT;
+ }
+ } else {
+ return DataTypes.INT;
+ }
case LONG:
- return DataTypes.LONG;
+ if (logicalType != null) {
+ if (logicalType instanceof LogicalTypes.TimestampMillis
+ || logicalType instanceof LogicalTypes.TimestampMicros) {
+ return DataTypes.TIMESTAMP;
+ } else {
+ LOGGER.warn("Unsupported logical type. Considering Data Type as LONG for " + childSchema
+ .getName());
+ return DataTypes.LONG;
+ }
+ } else {
+ return DataTypes.LONG;
+ }
case DOUBLE:
return DataTypes.DOUBLE;
case STRING: