You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2021/07/07 18:30:54 UTC

[impala] 04/04: IMPALA-9822: Add warnings when row format delimiters are ignored on non-text/sequence tables

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 59d32853ee42886ae683aac95a8be7f9c89b8eb7
Author: ShikhaAsrani <sh...@gmail.com>
AuthorDate: Fri Jun 11 20:55:10 2021 -0700

    IMPALA-9822: Add warnings when row format delimiters are ignored on
    non-text/sequence tables
    
    When users create a non-text/sequence table the row format gets ignored.
    To avoid surprising results in subsequent queries, users should be
    warned about the row format being ignored. So users can make sure if
    they want to change the file type instead.
    
    Fix: This patch adds a warning message in analyzing Create Table
    statement, when row format delimited fields are non empty and file
    format is not TEXTFILE/SEQUENCEFILE.
    
    Tests: Added a FE test to verify the warning message.
    
    Change-Id: I6fff8f26556ff3854403a418891a41d2a27f01a9
    Reviewed-on: http://gerrit.cloudera.org:8080/17589
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../apache/impala/analysis/CreateTableStmt.java    | 20 +++++++++++++++++
 .../org/apache/impala/analysis/AnalyzeDDLTest.java | 25 ++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
index 752a226..a4f4ec0 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
@@ -262,6 +262,26 @@ public class CreateTableStmt extends StatementBase {
         THdfsFileFormat.ICEBERG) {
       throw new AnalysisException("Table requires at least 1 column");
     }
+    if (getRowFormat() != null) {
+      String fieldDelimiter = getRowFormat().getFieldDelimiter();
+      String lineDelimiter = getRowFormat().getLineDelimiter();
+      String escapeChar = getRowFormat().getEscapeChar();
+      if (getFileFormat() != THdfsFileFormat.TEXT
+          || getFileFormat() != THdfsFileFormat.SEQUENCE_FILE) {
+        if (fieldDelimiter != null) {
+          analyzer.addWarning("'ROW FORMAT DELIMITED FIELDS TERMINATED BY '"
+              + fieldDelimiter + "'' is ignored.");
+        }
+        if (lineDelimiter != null) {
+          analyzer.addWarning("'ROW FORMAT DELIMITED LINES TERMINATED BY '"
+              + lineDelimiter + "'' is ignored.");
+        }
+        if (escapeChar != null) {
+          analyzer.addWarning(
+              "'ROW FORMAT DELIMITED ESCAPED BY '" + escapeChar + "'' is ignored.");
+        }
+      }
+    }
     if (getFileFormat() == THdfsFileFormat.AVRO) {
       setColumnDefs(analyzeAvroSchema(analyzer));
       if (getColumnDefs().isEmpty()) {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 6f86188..ea8a9dd 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -2317,6 +2317,20 @@ public class AnalyzeDDLTest extends FrontendTestBase {
     AnalysisError("create table t as select null as new_col",
         "Unable to infer the column type for column 'new_col'. Use cast() to " +
         "explicitly specify the column type for column 'new_col'.");
+
+    // IMPALA-9822 Row Format Delimited is valid only for Text Files
+    String[] fileFormats = {"PARQUET", "ICEBERG"};
+    for (String format : fileFormats) {
+      for (String rowFormat : ImmutableList.of(
+               "FIELDS TERMINATED BY ','", "LINES TERMINATED BY ','", "ESCAPED BY ','")) {
+        AnalyzesOk(
+            String.format(
+                "create table new_table row format delimited %s stored as %s as select *"
+                    + " from functional.child_table",
+                rowFormat, format),
+            "'ROW FORMAT DELIMITED " + rowFormat + "' is ignored.");
+      }
+    }
   }
 
   @Test
@@ -2574,6 +2588,17 @@ public class AnalyzeDDLTest extends FrontendTestBase {
       formatIndx++;
     }
 
+    for (formatIndx = 2; formatIndx < fileFormats.length; formatIndx++) {
+      for (String rowFormat : ImmutableList.of(
+               "FIELDS TERMINATED BY ','", "LINES TERMINATED BY ','", "ESCAPED BY ','")) {
+        AnalyzesOk(
+            String.format(
+                "create table new_table (i int) row format delimited %s stored as %s",
+                rowFormat, fileFormats[formatIndx]),
+            "'ROW FORMAT DELIMITED " + rowFormat + "' is ignored");
+      }
+    }
+
     // Note: Backslashes need to be escaped twice - once for Java and once for Impala.
     // For example, if this were a real query the value '\' would be stored in the
     // metastore for the ESCAPED BY field.