You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/06/05 10:41:50 UTC

[05/26] carbondata git commit: [CARBONDATA-2514] Added condition to check for duplicate column names

[CARBONDATA-2514] Added condition to check for duplicate column names

1. Duplicate columns check was not present.
2. IndexFileReader was not being closed due to which index file could not be deleted.

This closes #2332


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d4f9c340
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d4f9c340
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d4f9c340

Branch: refs/heads/branch-1.4
Commit: d4f9c3401740091e098e9e2fb3a888e5755d6dc6
Parents: b401a9f
Author: kunal642 <ku...@gmail.com>
Authored: Tue May 22 15:16:32 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Tue Jun 5 16:04:20 2018 +0530

----------------------------------------------------------------------
 .../apache/carbondata/core/util/CarbonUtil.java | 44 +++++++++++---------
 .../carbondata/core/util/DataTypeUtil.java      |  2 +
 .../sdk/file/CarbonWriterBuilder.java           |  7 ++++
 .../sdk/file/AvroCarbonWriterTest.java          | 40 ++++++++++++++++++
 4 files changed, 73 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d4f9c340/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 9dc4aa2..23d02ef 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -2380,27 +2380,31 @@ public final class CarbonUtil {
   public static org.apache.carbondata.format.TableInfo inferSchemaFromIndexFile(
       String indexFilePath, String tableName) throws IOException {
     CarbonIndexFileReader indexFileReader = new CarbonIndexFileReader();
-    indexFileReader.openThriftReader(indexFilePath);
-    org.apache.carbondata.format.IndexHeader readIndexHeader = indexFileReader.readIndexHeader();
-    List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
-    List<org.apache.carbondata.format.ColumnSchema> table_columns =
-        readIndexHeader.getTable_columns();
-    for (int i = 0; i < table_columns.size(); i++) {
-      columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
+    try {
+      indexFileReader.openThriftReader(indexFilePath);
+      org.apache.carbondata.format.IndexHeader readIndexHeader = indexFileReader.readIndexHeader();
+      List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
+      List<org.apache.carbondata.format.ColumnSchema> table_columns =
+          readIndexHeader.getTable_columns();
+      for (int i = 0; i < table_columns.size(); i++) {
+        columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
+      }
+      // only columnSchema is the valid entry, reset all dummy entries.
+      TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList);
+
+      ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter =
+          new ThriftWrapperSchemaConverterImpl();
+      org.apache.carbondata.format.TableSchema thriftFactTable =
+          thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
+      org.apache.carbondata.format.TableInfo tableInfo =
+          new org.apache.carbondata.format.TableInfo(thriftFactTable,
+              new ArrayList<org.apache.carbondata.format.TableSchema>());
+
+      tableInfo.setDataMapSchemas(null);
+      return tableInfo;
+    } finally {
+      indexFileReader.closeThriftReader();
     }
-    // only columnSchema is the valid entry, reset all dummy entries.
-    TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList);
-
-    ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter =
-        new ThriftWrapperSchemaConverterImpl();
-    org.apache.carbondata.format.TableSchema thriftFactTable =
-        thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
-    org.apache.carbondata.format.TableInfo tableInfo =
-        new org.apache.carbondata.format.TableInfo(thriftFactTable,
-            new ArrayList<org.apache.carbondata.format.TableSchema>());
-
-    tableInfo.setDataMapSchemas(null);
-    return tableInfo;
   }
 
   private static TableSchema getDummyTableSchema(String tableName,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d4f9c340/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
index f7f71b3..e06c82e 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
@@ -341,6 +341,7 @@ public final class DataTypeUtil {
       try {
         if (null != dateFormat && !dateFormat.trim().isEmpty()) {
           dateFormatter = new SimpleDateFormat(dateFormat);
+          dateFormatter.setLenient(false);
         } else {
           dateFormatter = timeStampformatter.get();
         }
@@ -376,6 +377,7 @@ public final class DataTypeUtil {
       try {
         if (null != dateFormat && !dateFormat.trim().isEmpty()) {
           dateFormatter = new SimpleDateFormat(dateFormat);
+          dateFormatter.setLenient(false);
         } else {
           dateFormatter = timeStampformatter.get();
         }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d4f9c340/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
index e846da4..2277ab0 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
@@ -21,9 +21,11 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -421,6 +423,7 @@ public class CarbonWriterBuilder {
 
   private void buildTableSchema(Field[] fields, TableSchemaBuilder tableSchemaBuilder,
       List<String> sortColumnsList, ColumnSchema[] sortColumnsSchemaList) {
+    Set<String> uniqueFields = new HashSet<>();
     // a counter which will be used in case of complex array type. This valIndex will be assigned
     // to child of complex array type in the order val1, val2 so that each array type child is
     // differentiated to any level
@@ -442,6 +445,10 @@ public class CarbonWriterBuilder {
     int i = 0;
     for (Field field : fields) {
       if (null != field) {
+        if (!uniqueFields.add(field.getFieldName())) {
+          throw new RuntimeException(
+              "Duplicate column " + field.getFieldName() + " found in table schema");
+        }
         int isSortColumn = sortColumnsList.indexOf(field.getFieldName());
         if (isSortColumn > -1) {
           // unsupported types for ("array", "struct", "double", "float", "decimal")

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d4f9c340/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java
index b70e74d..03a4f47 100644
--- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/AvroCarbonWriterTest.java
@@ -21,9 +21,12 @@ import java.io.File;
 import java.io.FileFilter;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 
 import org.apache.avro.generic.GenericData;
@@ -450,6 +453,43 @@ public class AvroCarbonWriterTest {
     FileUtils.deleteDirectory(new File(path));
   }
 
+  @Test
+  public void testExceptionForDuplicateColumns() throws IOException, InvalidLoadOptionException {
+    Field[] field = new Field[2];
+    field[0] = new Field("name", DataTypes.STRING);
+    field[1] = new Field("name", DataTypes.STRING);
+    CarbonWriterBuilder writer = CarbonWriter.builder().isTransactionalTable(false)
+        .uniqueIdentifier(System.currentTimeMillis()).outputPath(path);
+
+    try {
+      writer.buildWriterForCSVInput(new org.apache.carbondata.sdk.file.Schema(field));
+      Assert.fail();
+    } catch (Exception e) {
+      assert(e.getMessage().contains("Duplicate column name found in table schema"));
+    }
+    FileUtils.deleteDirectory(new File(path));
+  }
 
+  @Test
+  public void testExceptionForInvalidDate() throws IOException, InvalidLoadOptionException {
+    Field[] field = new Field[2];
+    field[0] = new Field("name", DataTypes.STRING);
+    field[1] = new Field("date", DataTypes.DATE);
+    CarbonWriterBuilder writer = CarbonWriter.builder().isTransactionalTable(false)
+        .uniqueIdentifier(System.currentTimeMillis()).outputPath(path);
+
+    try {
+      Map<String, String> loadOptions = new HashMap<String, String>();
+      loadOptions.put("bad_records_action", "fail");
+      CarbonWriter carbonWriter =
+          writer.isTransactionalTable(false).withLoadOptions(loadOptions).buildWriterForCSVInput(new org.apache.carbondata.sdk.file.Schema(field));
+      carbonWriter.write(new String[] { "k", "20-02-2233" });
+      carbonWriter.close();
+      Assert.fail();
+    } catch (Exception e) {
+      assert(e.getMessage().contains("Data load failed due to bad record"));
+    }
+    FileUtils.deleteDirectory(new File(path));
+  }
 
 }