You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/10/14 23:29:25 UTC

svn commit: r1631892 - in /hive/trunk: metastore/src/java/org/apache/hadoop/hive/metastore/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/test/queries/clientpositive/ ql/src/test/results/beelineposit...

Author: brock
Date: Tue Oct 14 21:29:24 2014
New Revision: 1631892

URL: http://svn.apache.org/r1631892
Log:
HIVE-7868 - AvroSerDe error handling could be improved (Ferdinand Xu via Brock)

Removed:
    hive/trunk/ql/src/test/queries/clientpositive/avro_schema_error_message.q
    hive/trunk/ql/src/test/results/beelinepositive/avro_schema_error_message.q.out
    hive/trunk/ql/src/test/results/clientpositive/avro_schema_error_message.q.out
Modified:
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
    hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Tue Oct 14 21:29:24 2014
@@ -3355,7 +3355,7 @@ public class HiveMetaStore extends Thrif
           ret = tbl.getSd().getCols();
         } else {
           try {
-            Deserializer s = MetaStoreUtils.getDeserializer(hiveConf, tbl);
+            Deserializer s = MetaStoreUtils.getDeserializer(hiveConf, tbl, false);
             ret = MetaStoreUtils.getFieldsFromDeserializer(tableName, s);
           } catch (SerDeException e) {
             StringUtils.stringifyException(e);

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java Tue Oct 14 21:29:24 2014
@@ -357,15 +357,21 @@ public class MetaStoreUtils {
    *
    */
   static public Deserializer getDeserializer(Configuration conf,
-      org.apache.hadoop.hive.metastore.api.Table table) throws MetaException {
+      org.apache.hadoop.hive.metastore.api.Table table, boolean skipConfError) throws
+          MetaException {
     String lib = table.getSd().getSerdeInfo().getSerializationLib();
     if (lib == null) {
       return null;
     }
     try {
       Deserializer deserializer = ReflectionUtils.newInstance(conf.getClassByName(lib).
-        asSubclass(Deserializer.class), conf);
-      SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), null);
+              asSubclass(Deserializer.class), conf);
+      if (skipConfError) {
+        SerDeUtils.initializeSerDeWithoutErrorCheck(deserializer, conf,
+                MetaStoreUtils.getTableMetadata(table), null);
+      } else {
+        SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), null);
+      }
       return deserializer;
     } catch (RuntimeException e) {
       throw e;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Tue Oct 14 21:29:24 2014
@@ -153,8 +153,10 @@ import org.apache.hadoop.hive.ql.securit
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveV1Authorizer;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
 import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
@@ -183,6 +185,7 @@ import java.io.Serializable;
 import java.io.Writer;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -2807,7 +2810,7 @@ public class DDLTask extends Task<DDLWor
    *          is the function we are describing
    * @throws HiveException
    */
-  private int describeFunction(DescFunctionDesc descFunc) throws HiveException {
+  private int describeFunction(DescFunctionDesc descFunc) throws HiveException, SQLException {
     String funcName = descFunc.getName();
 
     // write the results in the file
@@ -3088,6 +3091,15 @@ public class DDLTask extends Task<DDLWor
 
       List<FieldSchema> cols = null;
       List<ColumnStatisticsObj> colStats = null;
+
+      Deserializer deserializer = tbl.getDeserializer(true);
+      if (deserializer instanceof AbstractSerDe) {
+        String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
+        if (errorMsgs != null && !errorMsgs.isEmpty()) {
+          throw new SQLException(errorMsgs);
+        }
+      }
+
       if (colPath.equals(tableName)) {
         cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ?
             tbl.getCols() : part.getCols();
@@ -3096,7 +3108,7 @@ public class DDLTask extends Task<DDLWor
           cols.addAll(tbl.getPartCols());
         }
       } else {
-        cols = Hive.getFieldsFromDeserializer(colPath, tbl.getDeserializer());
+        cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
         if (descTbl.isFormatted()) {
           // when column name is specified in describe table DDL, colPath will
           // will be table_name.column_name
@@ -3126,6 +3138,8 @@ public class DDLTask extends Task<DDLWor
       outStream.close();
       outStream = null;
 
+    } catch (SQLException e) {
+      throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
     } catch (IOException e) {
       throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
     } finally {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Tue Oct 14 21:29:24 2014
@@ -239,7 +239,7 @@ public class FetchOperator implements Se
 
   private StructObjectInspector getRowInspectorFromTable(TableDesc table) throws Exception {
     Deserializer serde = table.getDeserializerClass().newInstance();
-    SerDeUtils.initializeSerDe(serde, job, table.getProperties(), null);
+    SerDeUtils.initializeSerDeWithoutErrorCheck(serde, job, table.getProperties(), null);
     return createRowInspector(getStructOIFrom(serde.getObjectInspector()));
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Tue Oct 14 21:29:24 2014
@@ -192,7 +192,7 @@ public class Table implements Serializab
           "at least one column must be specified for the table");
     }
     if (!isView()) {
-      if (null == getDeserializerFromMetaStore()) {
+      if (null == getDeserializerFromMetaStore(false)) {
         throw new HiveException("must specify a non-null serDe");
       }
       if (null == getInputFormatClass()) {
@@ -253,14 +253,21 @@ public class Table implements Serializab
 
   final public Deserializer getDeserializer() {
     if (deserializer == null) {
-      deserializer = getDeserializerFromMetaStore();
+      deserializer = getDeserializerFromMetaStore(false);
     }
     return deserializer;
   }
 
-  private Deserializer getDeserializerFromMetaStore() {
+  final public Deserializer getDeserializer(boolean skipConfError) {
+    if (deserializer == null) {
+      deserializer = getDeserializerFromMetaStore(skipConfError);
+    }
+    return deserializer;
+  }
+
+  final public Deserializer getDeserializerFromMetaStore(boolean skipConfError) {
     try {
-      return MetaStoreUtils.getDeserializer(Hive.get().getConf(), tTable);
+      return MetaStoreUtils.getDeserializer(Hive.get().getConf(), tTable, skipConfError);
     } catch (MetaException e) {
       throw new RuntimeException(e);
     } catch (HiveException e) {

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java Tue Oct 14 21:29:24 2014
@@ -31,6 +31,8 @@ import org.apache.hadoop.io.Writable;
  */
 public abstract class AbstractSerDe implements SerDe {
 
+  protected String configErrors;
+
   /**
    * Initialize the SerDe. By default, this will use one set of properties, either the
    * table properties or the partition properties. If a SerDe needs access to both sets,
@@ -101,4 +103,17 @@ public abstract class AbstractSerDe impl
    * structure of the Object returned from deserialize(...).
    */
   public abstract ObjectInspector getObjectInspector() throws SerDeException;
+
+  /**
+   * Get the error messages during the Serde configuration
+   *
+   * @return The error messages in the configuration which are empty if no error occurred
+   */
+  public String getConfigurationErrors() {
+    if (configErrors == null || configErrors.isEmpty()) {
+      return "";
+    } else {
+      return configErrors;
+    }
+  }
 }

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java Tue Oct 14 21:29:24 2014
@@ -506,7 +506,8 @@ public final class SerDeUtils {
 
   /**
    * Initializes a SerDe.
-   * @param serde
+   * @param deserializer
+   * @param conf
    * @param tblProps
    * @param partProps
    * @throws SerDeException
@@ -516,6 +517,28 @@ public final class SerDeUtils {
                                                 throws SerDeException {
     if (deserializer instanceof AbstractSerDe) {
       ((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps);
+      String msg = ((AbstractSerDe) deserializer).getConfigurationErrors();
+      if (msg != null && !msg.isEmpty()) {
+        throw new SerDeException(msg);
+      }
+    } else {
+      deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps));
+    }
+  }
+
+  /**
+   * Initializes a SerDe.
+   * @param deserializer
+   * @param conf
+   * @param tblProps
+   * @param partProps
+   * @throws SerDeException
+   */
+  public static void initializeSerDeWithoutErrorCheck(Deserializer deserializer,
+                                                      Configuration conf, Properties tblProps,
+                                                      Properties partProps) throws SerDeException {
+    if (deserializer instanceof AbstractSerDe) {
+      ((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps);
     } else {
       deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps));
     }

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java Tue Oct 14 21:29:24 2014
@@ -89,7 +89,7 @@ public class AvroSerDe extends AbstractS
         || properties.getProperty(AvroSerdeUtils.SCHEMA_URL) != null
         || columnNameProperty == null || columnNameProperty.isEmpty()
         || columnTypeProperty == null || columnTypeProperty.isEmpty()) {
-      schema = AvroSerdeUtils.determineSchemaOrReturnErrorSchema(properties);
+      schema = determineSchemaOrReturnErrorSchema(properties);
     } else {
       // Get column names and sort order
       columnNames = Arrays.asList(columnNameProperty.split(","));
@@ -135,6 +135,32 @@ public class AvroSerDe extends AbstractS
     this.oi = aoig.getObjectInspector();
   }
 
+  /**
+   * Attempt to determine the schema via the usual means, but do not throw
+   * an exception if we fail.  Instead, signal failure via a special
+   * schema.  This is used because Hive calls init on the serde during
+   * any call, including calls to update the serde properties, meaning
+   * if the serde is in a bad state, there is no way to update that state.
+   */
+  public Schema determineSchemaOrReturnErrorSchema(Properties props) {
+    try {
+      configErrors = "";
+      return AvroSerdeUtils.determineSchemaOrThrowException(props);
+    } catch(AvroSerdeException he) {
+      LOG.warn("Encountered AvroSerdeException determining schema. Returning " +
+              "signal schema to indicate problem", he);
+      configErrors = new String("Encountered AvroSerdeException determining schema. Returning " +
+              "signal schema to indicate problem: " + he.getMessage());
+      return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA;
+    } catch (Exception e) {
+      LOG.warn("Encountered exception determining schema. Returning signal " +
+              "schema to indicate problem", e);
+      configErrors = new String("Encountered exception determining schema. Returning signal " +
+              "schema to indicate problem: " + e.getMessage());
+      return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA;
+    }
+  }
+
   @Override
   public Class<? extends Writable> getSerializedClass() {
     return AvroGenericRecordWritable.class;

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java Tue Oct 14 21:29:24 2014
@@ -91,26 +91,6 @@ public class AvroSerdeUtils {
     }
   }
 
-  /**
-   * Attempt to determine the schema via the usual means, but do not throw
-   * an exception if we fail.  Instead, signal failure via a special
-   * schema.  This is used because Hive calls init on the serde during
-   * any call, including calls to update the serde properties, meaning
-   * if the serde is in a bad state, there is no way to update that state.
-   */
-  public static Schema determineSchemaOrReturnErrorSchema(Properties props) {
-    try {
-      return determineSchemaOrThrowException(props);
-    } catch(AvroSerdeException he) {
-      LOG.warn("Encountered AvroSerdeException determining schema. Returning " +
-              "signal schema to indicate problem", he);
-      return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA;
-    } catch (Exception e) {
-      LOG.warn("Encountered exception determining schema. Returning signal " +
-              "schema to indicate problem", e);
-      return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA;
-    }
-  }
   // Protected for testing and so we can pass in a conf for testing.
   protected static Schema getSchemaFromFS(String schemaFSUrl,
                           Configuration conf) throws IOException, URISyntaxException {

Modified: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java?rev=1631892&r1=1631891&r2=1631892&view=diff
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java (original)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java Tue Oct 14 21:29:24 2014
@@ -85,77 +85,59 @@ public class TestAvroSerde {
   }
 
   @Test
-  public void noSchemaProvidedReturnsErrorSchema() throws SerDeException {
+  public void noSchemaProvidedThrowsException() {
     Properties props = new Properties();
 
-    verifyErrorSchemaReturned(props);
+    verifyExpectedException(props);
   }
 
   @Test
-  public void gibberishSchemaProvidedReturnsErrorSchema() throws SerDeException {
+  public void gibberishSchemaProvidedReturnsErrorSchema() {
     Properties props = new Properties();
     props.put(AvroSerdeUtils.SCHEMA_LITERAL, "blahblahblah");
 
-    verifyErrorSchemaReturned(props);
+    verifyExpectedException(props);
   }
 
   @Test
-  public void emptySchemaProvidedReturnsErrorSchema() throws SerDeException {
+  public void emptySchemaProvidedThrowsException() {
     Properties props = new Properties();
     props.put(AvroSerdeUtils.SCHEMA_LITERAL, "");
 
-    verifyErrorSchemaReturned(props);
+    verifyExpectedException(props);
   }
 
   @Test
-  public void badSchemaURLProvidedReturnsErrorSchema() throws SerDeException {
+  public void badSchemaURLProvidedThrowsException() {
     Properties props = new Properties();
     props.put(AvroSerdeUtils.SCHEMA_URL, "not://a/url");
 
-    verifyErrorSchemaReturned(props);
+    verifyExpectedException(props);
   }
 
   @Test
-  public void emptySchemaURLProvidedReturnsErrorSchema() throws SerDeException {
+  public void emptySchemaURLProvidedThrowsException() {
     Properties props = new Properties();
     props.put(AvroSerdeUtils.SCHEMA_URL, "");
 
-    verifyErrorSchemaReturned(props);
+    verifyExpectedException(props);
   }
 
   @Test
-  public void bothPropertiesSetToNoneReturnsErrorSchema() throws SerDeException {
+  public void bothPropertiesSetToNoneThrowsException() {
     Properties props = new Properties();
     props.put(AvroSerdeUtils.SCHEMA_URL, AvroSerdeUtils.SCHEMA_NONE);
     props.put(AvroSerdeUtils.SCHEMA_LITERAL, AvroSerdeUtils.SCHEMA_NONE);
 
-    verifyErrorSchemaReturned(props);
+    verifyExpectedException(props);
   }
 
-  private void verifyErrorSchemaReturned(Properties props) throws SerDeException {
+  private void verifyExpectedException(Properties props) {
     AvroSerDe asd = new AvroSerDe();
-    SerDeUtils.initializeSerDe(asd, new Configuration(), props, null);
-    assertTrue(asd.getObjectInspector() instanceof StandardStructObjectInspector);
-    StandardStructObjectInspector oi = (StandardStructObjectInspector)asd.getObjectInspector();
-    List<? extends StructField> allStructFieldRefs = oi.getAllStructFieldRefs();
-    assertEquals(SchemaResolutionProblem.SIGNAL_BAD_SCHEMA.getFields().size(), allStructFieldRefs.size());
-    StructField firstField = allStructFieldRefs.get(0);
-    assertTrue(firstField.toString().contains("error_error_error_error_error_error_error"));
-
-    try {
-      Writable mock = Mockito.mock(Writable.class);
-      asd.deserialize(mock);
-      fail("Should have thrown a BadSchemaException");
-    } catch (BadSchemaException bse) {
-      // good
-    }
-
     try {
-      Object o = Mockito.mock(Object.class);
-      ObjectInspector mockOI = Mockito.mock(ObjectInspector.class);
-      asd.serialize(o, mockOI);
-      fail("Should have thrown a BadSchemaException");
-    } catch (BadSchemaException bse) {
+      SerDeUtils.initializeSerDe(asd, new Configuration(), props, null);
+      fail("Expected Exception did not be thrown");
+    } catch (SerDeException e) {
       // good
     }
   }