You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by gv...@apache.org on 2018/06/08 11:40:46 UTC

[21/50] [abbrv] carbondata git commit: [CARBONDATA-2514] Added condition to check for duplicate column names

[CARBONDATA-2514] Added condition to check for duplicate column names

1. Duplicate columns check was not present.
2. IndexFileReader was not being closed due to which index file could not be deleted.

This closes #2332


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cf666c17
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cf666c17
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cf666c17

Branch: refs/heads/spark-2.3
Commit: cf666c17b8be9f11dd9c0b51503ca194162ee782
Parents: 16ed99a
Author: kunal642 <ku...@gmail.com>
Authored: Tue May 22 15:16:32 2018 +0530
Committer: manishgupta88 <to...@gmail.com>
Committed: Thu May 24 10:19:47 2018 +0530

----------------------------------------------------------------------
 .../apache/carbondata/core/util/CarbonUtil.java | 44 +++++++++++---------
 .../carbondata/core/util/DataTypeUtil.java      |  2 +
 .../sdk/file/CarbonWriterBuilder.java           |  7 ++++
 .../sdk/file/AvroCarbonWriterTest.java          | 40 ++++++++++++++++++
 4 files changed, 73 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf666c17/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 9dc4aa2..23d02ef 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -2380,27 +2380,31 @@ public final class CarbonUtil {
   public static org.apache.carbondata.format.TableInfo inferSchemaFromIndexFile(
       String indexFilePath, String tableName) throws IOException {
     CarbonIndexFileReader indexFileReader = new CarbonIndexFileReader();
-    indexFileReader.openThriftReader(indexFilePath);
-    org.apache.carbondata.format.IndexHeader readIndexHeader = indexFileReader.readIndexHeader();
-    List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
-    List<org.apache.carbondata.format.ColumnSchema> table_columns =
-        readIndexHeader.getTable_columns();
-    for (int i = 0; i < table_columns.size(); i++) {
-      columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
+    try {
+      indexFileReader.openThriftReader(indexFilePath);
+      org.apache.carbondata.format.IndexHeader readIndexHeader = indexFileReader.readIndexHeader();
+      List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
+      List<org.apache.carbondata.format.ColumnSchema> table_columns =
+          readIndexHeader.getTable_columns();
+      for (int i = 0; i < table_columns.size(); i++) {
+        columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
+      }
+      // only columnSchema is the valid entry, reset all dummy entries.
+      TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList);
+
+      ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter =
+          new ThriftWrapperSchemaConverterImpl();
+      org.apache.carbondata.format.TableSchema thriftFactTable =
+          thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
+      org.apache.carbondata.format.TableInfo tableInfo =
+          new org.apache.carbondata.format.TableInfo(thriftFactTable,
+              new ArrayList<org.apache.carbondata.format.TableSchema>());
+
+      tableInfo.setDataMapSchemas(null);
+      return tableInfo;
+    } finally {
+      indexFileReader.closeThriftReader();
     }
-    // only columnSchema is the valid entry, reset all dummy entries.
-    TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList);
-
-    ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter =
-        new ThriftWrapperSchemaConverterImpl();
-    org.apache.carbondata.format.TableSchema thriftFactTable =
-        thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
-    org.apache.carbondata.format.TableInfo tableInfo =
-        new org.apache.carbondata.format.TableInfo(thriftFactTable,
-            new ArrayList<org.apache.carbondata.format.TableSchema>());
-
-    tableInfo.setDataMapSchemas(null);
-    return tableInfo;
   }
 
   private static TableSchema getDummyTableSchema(String tableName,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf666c17/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
index f7f71b3..e06c82e 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
@@ -341,6 +341,7 @@ public final class DataTypeUtil {
       try {
         if (null != dateFormat && !dateFormat.trim().isEmpty()) {
           dateFormatter = new SimpleDateFormat(dateFormat);
+          dateFormatter.setLenient(false);
         } else {
           dateFormatter = timeStampformatter.get();
         }
@@ -376,6 +377,7 @@ public final class DataTypeUtil {
       try {
         if (null != dateFormat && !dateFormat.trim().isEmpty()) {
           dateFormatter = new SimpleDateFormat(dateFormat);
+          dateFormatter.setLenient(false);
         } else {
           dateFormatter = timeStampformatter.get();
         }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf666c17/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
index e846da4..2277ab0 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
@@ -21,9 +21,11 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -421,6 +423,7 @@ public class CarbonWriterBuilder {
 
   private void buildTableSchema(Field[] fields, TableSchemaBuilder tableSchemaBuilder,
       List<String> sortColumnsList, ColumnSchema[] sortColumnsSchemaList) {
+    Set<String> uniqueFields = new HashSet<>();
     // a counter which will be used in case of complex array type. This valIndex will be assigned
     // to child of complex array type in the order val1, val2 so that each array type child is
     // differentiated to any level
@@ -442,6 +445,10 @@ public class CarbonWriterBuilder {
     int i = 0;
     for (Field field : fields) {
       if (null != field) {
+        if (!uniqueFields.add(field.getFieldName())) {
+          throw new RuntimeException(
+              "Duplicate column " + field.getFieldName() + " found in table schema");
+        }
         int isSortColumn = sortColumnsList.indexOf(field.getFieldName());
         if (isSortColumn > -1) {
           // unsupported types for ("array", "struct", "double", "float", "decimal")

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf666c17/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java
index b70e74d..03a4f47 100644
--- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java
@@ -21,9 +21,12 @@ import java.io.File;
 import java.io.FileFilter;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 
 import org.apache.avro.generic.GenericData;
@@ -450,6 +453,43 @@ public class AvroCarbonWriterTest {
     FileUtils.deleteDirectory(new File(path));
   }
 
+  @Test
+  public void testExceptionForDuplicateColumns() throws IOException, InvalidLoadOptionException {
+    Field[] field = new Field[2];
+    field[0] = new Field("name", DataTypes.STRING);
+    field[1] = new Field("name", DataTypes.STRING);
+    CarbonWriterBuilder writer = CarbonWriter.builder().isTransactionalTable(false)
+        .uniqueIdentifier(System.currentTimeMillis()).outputPath(path);
+
+    try {
+      writer.buildWriterForCSVInput(new org.apache.carbondata.sdk.file.Schema(field));
+      Assert.fail();
+    } catch (Exception e) {
+      assert(e.getMessage().contains("Duplicate column name found in table schema"));
+    }
+    FileUtils.deleteDirectory(new File(path));
+  }
 
+  @Test
+  public void testExceptionForInvalidDate() throws IOException, InvalidLoadOptionException {
+    Field[] field = new Field[2];
+    field[0] = new Field("name", DataTypes.STRING);
+    field[1] = new Field("date", DataTypes.DATE);
+    CarbonWriterBuilder writer = CarbonWriter.builder().isTransactionalTable(false)
+        .uniqueIdentifier(System.currentTimeMillis()).outputPath(path);
+
+    try {
+      Map<String, String> loadOptions = new HashMap<String, String>();
+      loadOptions.put("bad_records_action", "fail");
+      CarbonWriter carbonWriter =
+          writer.isTransactionalTable(false).withLoadOptions(loadOptions).buildWriterForCSVInput(new org.apache.carbondata.sdk.file.Schema(field));
+      carbonWriter.write(new String[] { "k", "20-02-2233" });
+      carbonWriter.close();
+      Assert.fail();
+    } catch (Exception e) {
+      assert(e.getMessage().contains("Data load failed due to bad record"));
+    }
+    FileUtils.deleteDirectory(new File(path));
+  }
 
 }