You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by aj...@apache.org on 2020/09/18 14:43:14 UTC
[carbondata] branch master updated: [CARBONDATA-3914] Fixed exception on reading data from carbon-hive empty table.

This is an automated email from the ASF dual-hosted git repository.

ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 131aaf5  [CARBONDATA-3914] Fixed exception on reading data from carbon-hive empty table.
131aaf5 is described below

commit 131aaf5fd4a8fd2ce41c365a9d1d2f1804b50c2a
Author: Nihal ojha <ni...@gmail.com>
AuthorDate: Thu Sep 17 20:03:08 2020 +0530

    [CARBONDATA-3914] Fixed exception on reading data from carbon-hive empty table.
    
    Why is this PR needed?
    Reading data from empty carbontable through hive beeline was giving 'Unable read Carbon Schema' exception when no data is present in the carbon table. In case of empty table, some additional fields are present in the columns. And also schema is comma-separated which we are expecting to be colon-separated.
    
    What changes were proposed in this PR?
    Return the empty data if no data is present. Validated the columns and column types and removed, if any extra field was added. Also parsed the schema in such a way that it can consider comma-separated values also.
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    Yes
    
    This closes #3936
---
 .../carbondata/hive/MapredCarbonInputFormat.java   | 12 +++++--
 .../carbondata/hive/util/HiveCarbonUtil.java       | 41 ++++++++++++++++++----
 .../org/apache/carbondata/hive/HiveCarbonTest.java | 13 +++++++
 .../org/apache/carbondata/hive/HiveTestUtils.java  |  3 --
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java b/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
index 66f7c80..8ef2529 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
@@ -154,8 +154,16 @@ public class MapredCarbonInputFormat extends CarbonTableInputFormat<ArrayWritabl
     } else {
       carbonInputFormat = new CarbonFileInputFormat<>();
     }
-    List<org.apache.hadoop.mapreduce.InputSplit> splitList =
-        carbonInputFormat.getSplits(jobContext);
+    List<org.apache.hadoop.mapreduce.InputSplit> splitList;
+    try {
+      splitList = carbonInputFormat.getSplits(jobContext);
+    } catch (IOException ex) {
+      if (ex.getMessage().contains("No Index files are present in the table location :")) {
+        splitList = new ArrayList<>();
+      } else {
+        throw ex;
+      }
+    }
     InputSplit[] splits = new InputSplit[splitList.size()];
     CarbonInputSplit split;
     for (int i = 0; i < splitList.size(); i++) {
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/util/HiveCarbonUtil.java b/integration/hive/src/main/java/org/apache/carbondata/hive/util/HiveCarbonUtil.java
index ae2bceb..973afe8 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/util/HiveCarbonUtil.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/util/HiveCarbonUtil.java
@@ -171,15 +171,35 @@ public class HiveCarbonUtil {
       columns = columns + "," + partitionColumns;
       columnTypes = columnTypes + ":" + partitionColumnTypes;
     }
-    String[] columnTypeArray = HiveCarbonUtil.splitSchemaStringToArray(columnTypes);
-
+    String[][] validatedColumnsAndTypes = validateColumnsAndTypes(columns, columnTypes);
     CarbonTable carbonTable = CarbonTable.buildFromTableInfo(
         HiveCarbonUtil.getTableInfo(tableName, databaseName, tablePath,
-            sortColumns, columns.split(","), columnTypeArray, new ArrayList<>()));
+            sortColumns, validatedColumnsAndTypes[0],
+            validatedColumnsAndTypes[1], new ArrayList<>()));
     carbonTable.setTransactionalTable(false);
     return carbonTable;
   }
 
+  // In case of empty table some extra field is getting added in the columns and columntypes
+  // which should be removed after validation.
+  private static String[][] validateColumnsAndTypes(String columns, String columnTypes) {
+    String[] columnTypeArray = HiveCarbonUtil.splitSchemaStringToArray(columnTypes);
+    String[] columnArray = columns.split(",");
+    String[] validatedColumnArray;
+    String[] validatedColumnTypeArray;
+    int length = columnArray.length;
+    if (columnArray[length - 3].equalsIgnoreCase("BLOCK__OFFSET__INSIDE__FILE")) {
+      validatedColumnArray = new String[length - 3];
+      validatedColumnTypeArray = new String[length - 3];
+      System.arraycopy(columnArray, 0, validatedColumnArray, 0, length - 3);
+      System.arraycopy(columnTypeArray, 0, validatedColumnTypeArray, 0, length - 3);
+    } else {
+      validatedColumnArray = columnArray;
+      validatedColumnTypeArray = columnTypeArray;
+    }
+    return new String[][]{validatedColumnArray, validatedColumnTypeArray};
+  }
+
   private static TableInfo getTableInfo(String tableName, String databaseName, String location,
       String sortColumnsString, String[] columns, String[] columnTypes,
       List<String> partitionColumns) throws SQLException {
@@ -303,8 +323,15 @@ public class HiveCarbonUtil {
     List<String> tokens = new ArrayList();
     StringBuilder stack = new StringBuilder();
     int openingCount = 0;
-    for (int i = 0; i < schema.length(); i++) {
-      if (schema.charAt(i) == '<') {
+    int length = schema.length();
+    for (int i = 0; i < length; i++) {
+      if (schema.charAt(i) == '(') {
+        stack.append(schema.charAt(i));
+        while (++i < length && schema.charAt(i) != ')') {
+          stack.append(schema.charAt(i));
+        }
+        stack.append(schema.charAt(i));
+      } else if (schema.charAt(i) == '<') {
         openingCount++;
         stack.append(schema.charAt(i));
       } else if (schema.charAt(i) == '>') {
@@ -317,9 +344,9 @@ public class HiveCarbonUtil {
         } else {
           stack.append(schema.charAt(i));
         }
-      } else if (schema.charAt(i) == ':' && openingCount > 0) {
+      } else if ((schema.charAt(i) == ':' || schema.charAt(i) == ',') && openingCount > 0) {
         stack.append(schema.charAt(i));
-      } else if (schema.charAt(i) == ':' && openingCount == 0) {
+      } else if ((schema.charAt(i) == ':' || schema.charAt(i) == ',') && openingCount == 0) {
         tokens.add(stack.toString());
         stack = new StringBuilder();
         openingCount = 0;
diff --git a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
index 1eac80a..b495933 100644
--- a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
+++ b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
@@ -86,6 +86,19 @@ public class HiveCarbonTest extends HiveTestUtils {
   }
 
   @Test
+  public void verifyEmptyTableSelectQuery() throws Exception {
+    statement.execute("drop table if exists hive_carbon_table1");
+    statement.execute("CREATE TABLE hive_carbon_table1(id INT, name STRING, scale DECIMAL, country STRING, salary DOUBLE) stored by 'org.apache.carbondata.hive.CarbonStorageHandler'");
+    statement.execute("drop table if exists hive_carbon_table2");
+    statement.execute("CREATE TABLE hive_carbon_table2(id INT, name STRING, scale DECIMAL, country STRING, salary DOUBLE) stored by 'org.apache.carbondata.hive.CarbonStorageHandler'");
+    statement.execute("INSERT into hive_carbon_table2 SELECT * FROM hive_carbon_table1");
+    checkAnswer(statement.executeQuery("SELECT * FROM hive_carbon_table2"),
+        connection.createStatement().executeQuery("select * from hive_carbon_table1"));
+    statement.execute("drop table if exists hive_carbon_table1");
+    statement.execute("drop table if exists hive_carbon_table2");
+  }
+
+  @Test
   public void verifyDataAfterLoadUsingSortColumns() throws Exception {
     statement.execute("drop table if exists hive_carbon_table5");
     statement.execute(
diff --git a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java
index 44ec330..951ac06 100644
--- a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java
+++ b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java
@@ -66,11 +66,9 @@ public abstract class HiveTestUtils {
 
   public boolean checkAnswer(ResultSet actual, ResultSet expected) throws SQLException {
     Assert.assertEquals("Row Count Mismatch: ", expected.getFetchSize(), actual.getFetchSize());
-    int rowCountExpected = 0;
     List<String> expectedValuesList = new ArrayList<>();
     List<String> actualValuesList = new ArrayList<>();
     while (expected.next()) {
-      rowCountExpected ++;
       if (!actual.next()) {
         return false;
       }
@@ -88,7 +86,6 @@ public abstract class HiveTestUtils {
     }
     Collections.sort(expectedValuesList);Collections.sort(actualValuesList);
     Assert.assertArrayEquals(expectedValuesList.toArray(), actualValuesList.toArray());
-    Assert.assertTrue(rowCountExpected > 0);
     return true;
   }