You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/01/23 20:59:24 UTC

svn commit: r1654355 [12/27] - in /hive/branches/llap: ./ beeline/src/java/org/apache/hive/beeline/ cli/src/java/org/apache/hadoop/hive/cli/ common/src/java/org/apache/hadoop/hive/common/ common/src/java/org/apache/hadoop/hive/conf/ data/conf/ data/con...

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java Fri Jan 23 19:59:11 2015
@@ -57,22 +57,11 @@ public abstract class MapJoinKey {
   public abstract boolean hasAnyNulls(int fieldCount, boolean[] nullsafes);
 
   @SuppressWarnings("deprecation")
-  public static MapJoinKey read(Output output, MapJoinKey key,
-      MapJoinObjectSerDeContext context, Writable writable, boolean mayReuseKey)
-      throws SerDeException, HiveException {
+  public static MapJoinKey read(Output output, MapJoinObjectSerDeContext context,
+      Writable writable) throws SerDeException, HiveException {
     SerDe serde = context.getSerDe();
     Object obj = serde.deserialize(writable);
-    boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
-    if (useOptimized || key == null) {
-      byte[] structBytes = serialize(output, obj, serde.getObjectInspector(), !useOptimized);
-      if (structBytes != null) {
-        return MapJoinKeyBytes.fromBytes(key, mayReuseKey, structBytes);
-      } else if (useOptimized) {
-        throw new SerDeException(
-            "Failed to serialize " + obj + " even though optimized keys are used");
-      }
-    }
-    MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
+    MapJoinKeyObject result = new MapJoinKeyObject();
     result.read(serde.getObjectInspector(), obj);
     return result;
   }
@@ -98,35 +87,6 @@ public abstract class MapJoinKey {
     SUPPORTED_PRIMITIVES.add(PrimitiveCategory.CHAR);
   }
 
-  private static byte[] serialize(Output byteStream,
-      Object obj, ObjectInspector oi, boolean checkTypes) throws HiveException {
-    if (null == obj || !(oi instanceof StructObjectInspector)) {
-      return null; // not supported
-    }
-    StructObjectInspector soi = (StructObjectInspector)oi;
-    List<? extends StructField> fields = soi.getAllStructFieldRefs();
-    int size = fields.size();
-    if (size > 8) {
-      return null; // not supported
-    } else if (size == 0) {
-      return EMPTY_BYTE_ARRAY; // shortcut for null keys
-    }
-    Object[] fieldData = new Object[size];
-    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>(size);
-    for (int i = 0; i < size; ++i) {
-      StructField field = fields.get(i);
-      ObjectInspector foi = field.getFieldObjectInspector();
-      if (checkTypes && !isSupportedField(foi)) {
-        return null;
-      }
-      fieldData[i] = soi.getStructFieldData(obj, field);
-      fieldOis.add(foi);
-    }
-
-    byteStream = serializeRow(byteStream, fieldData, fieldOis, null);
-    return Arrays.copyOf(byteStream.getData(), byteStream.getLength());
-  }
-
   public static boolean isSupportedField(ObjectInspector foi) {
     if (foi.getCategory() != Category.PRIMITIVE) return false; // not supported
     PrimitiveCategory pc = ((PrimitiveObjectInspector)foi).getPrimitiveCategory();
@@ -136,19 +96,6 @@ public abstract class MapJoinKey {
 
   public static MapJoinKey readFromVector(Output output, MapJoinKey key, Object[] keyObject,
       List<ObjectInspector> keyOIs, boolean mayReuseKey) throws HiveException {
-    boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
-    if (useOptimized || key == null) {
-      if (keyObject.length <= 8) {
-        output = serializeRow(output, keyObject, keyOIs, null);
-        return MapJoinKeyBytes.fromBytes(key, mayReuseKey,
-            Arrays.copyOf(output.getData(), output.getLength()));
-      }
-      if (useOptimized) {
-        throw new HiveException(
-            "Failed to serialize " + Arrays.toString(keyObject) +
-                " even though optimized keys are used");
-      }
-    }
     MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
     result.setKeyObjects(keyObject);
     return result;
@@ -178,32 +125,11 @@ public abstract class MapJoinKey {
 
   public static MapJoinKey readFromRow(Output output, MapJoinKey key, Object[] keyObject,
       List<ObjectInspector> keyFieldsOI, boolean mayReuseKey) throws HiveException {
-    boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
-    if (useOptimized || key == null) {
-      if (keyObject.length <= 8) {
-        byte[] structBytes;
-        if (keyObject.length == 0) {
-          structBytes = EMPTY_BYTE_ARRAY; // shortcut for null keys
-        } else {
-          output = serializeRow(output, keyObject, keyFieldsOI, null);
-          structBytes = Arrays.copyOf(output.getData(), output.getLength());
-        }
-        return MapJoinKeyBytes.fromBytes(key, mayReuseKey, structBytes);
-      }
-      if (useOptimized) {
-        throw new HiveException(
-            "Failed to serialize " + Arrays.toString(keyObject) +
-                " even though optimized keys are used");
-      }
-    }
     MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
     result.readFromRow(keyObject, keyFieldsOI);
     return result;
   }
 
-  private static final Log LOG = LogFactory.getLog(MapJoinKey.class);
-
-
   /**
    * Serializes row to output.
    * @param byteStream Output to reuse. Can be null, in that case a new one would be created.
@@ -228,8 +154,4 @@ public abstract class MapJoinKey {
     }
     return byteStream;
   }
-
-  private static boolean useOptimizedKeyBasedOnPrev(MapJoinKey key) {
-    return (key != null) && (key instanceof MapJoinKeyBytes);
-  }
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java Fri Jan 23 19:59:11 2015
@@ -19,15 +19,20 @@
 package org.apache.hadoop.hive.ql.exec.persistence;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.lang.reflect.Constructor;
 import java.util.ConcurrentModificationException;
 import java.util.Map;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.io.Writable;
 
 @SuppressWarnings("deprecation")
@@ -86,6 +91,74 @@ public class MapJoinTableContainerSerDe
       throw new HiveException("Error while trying to create table container", e);
     }
   }
+
+  /**
+   * Loads the table container from a folder. Only used on Spark path.
+   * @param fs FileSystem of the folder.
+   * @param folder The folder to load table container.
+   * @return Loaded table.
+   */
+  @SuppressWarnings("unchecked")
+  public MapJoinPersistableTableContainer load(
+      FileSystem fs, Path folder) throws HiveException {
+    try {
+      if (!fs.isDirectory(folder)) {
+        throw new HiveException("Error, not a directory: " + folder);
+      }
+      FileStatus[] fileStatuses = fs.listStatus(folder);
+      if (fileStatuses == null || fileStatuses.length == 0) {
+        return null;
+      }
+
+      SerDe keySerDe = keyContext.getSerDe();
+      SerDe valueSerDe = valueContext.getSerDe();
+      Writable keyContainer = keySerDe.getSerializedClass().newInstance();
+      Writable valueContainer = valueSerDe.getSerializedClass().newInstance();
+
+      MapJoinPersistableTableContainer tableContainer = null;
+
+      for (FileStatus fileStatus: fileStatuses) {
+        Path filePath = fileStatus.getPath();
+        if (ShimLoader.getHadoopShims().isDirectory(fileStatus)) {
+          throw new HiveException("Error, not a file: " + filePath);
+        }
+        InputStream is = null;
+        ObjectInputStream in = null;
+        try {
+          is = fs.open(filePath, 4096);
+          in = new ObjectInputStream(is);
+          String name = in.readUTF();
+          Map<String, String> metaData = (Map<String, String>) in.readObject();
+          if (tableContainer == null) {
+            tableContainer = create(name, metaData);
+          }
+          int numKeys = in.readInt();
+          for (int keyIndex = 0; keyIndex < numKeys; keyIndex++) {
+            MapJoinKeyObject key = new MapJoinKeyObject();
+            key.read(keyContext, in, keyContainer);
+            if (tableContainer.get(key) == null) {
+              tableContainer.put(key, new MapJoinEagerRowContainer());
+            }
+            MapJoinEagerRowContainer values = (MapJoinEagerRowContainer) tableContainer.get(key);
+            values.read(valueContext, in, valueContainer);
+            tableContainer.put(key, values);
+          }
+        } finally {
+          if (in != null) {
+            in.close();
+          } else if (is != null) {
+            is.close();
+          }
+        }
+      }
+      return tableContainer;
+    } catch (IOException e) {
+      throw new HiveException("IO error while trying to create table container", e);
+    } catch (Exception e) {
+      throw new HiveException("Error while trying to create table container", e);
+    }
+  }
+
   public void persist(ObjectOutputStream out, MapJoinPersistableTableContainer tableContainer)
       throws HiveException {
     int numKeys = tableContainer.size();

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java Fri Jan 23 19:59:11 2015
@@ -234,7 +234,7 @@ public class CustomPartitionVertex exten
         try {
           fileSplit = getFileSplitFromEvent(diEvent);
         } catch (IOException e) {
-          throw new RuntimeException("Failed to get file split for event: " + diEvent);
+          throw new RuntimeException("Failed to get file split for event: " + diEvent, e);
         }
         Set<FileSplit> fsList =
             pathFileSplitsMap.get(Utilities.getBucketFileNameFromPathSubString(fileSplit.getPath()

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java Fri Jan 23 19:59:11 2015
@@ -578,7 +578,7 @@ public class DagUtils {
         }
       } catch (IOException e) {
         throw new RuntimeException(
-            "Can't make path " + outputPath + " : " + e.getMessage());
+            "Can't make path " + outputPath + " : " + e.getMessage(), e);
       }
     }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java Fri Jan 23 19:59:11 2015
@@ -57,8 +57,6 @@ public class HashTableLoader implements
   private ExecMapperContext context;
   private Configuration hconf;
   private MapJoinDesc desc;
-  private MapJoinKey lastKey = null;
-  private int rowCount = 0;
 
   @Override
   public void init(ExecMapperContext context, Configuration hconf, MapJoinOperator joinOp) {
@@ -111,8 +109,7 @@ public class HashTableLoader implements
             : new HashMapWrapper(hconf, keyCount);
 
         while (kvReader.next()) {
-          rowCount++;
-          lastKey = tableContainer.putRow(keyCtx, (Writable)kvReader.getCurrentKey(),
+          tableContainer.putRow(keyCtx, (Writable)kvReader.getCurrentKey(),
               valCtx, (Writable)kvReader.getCurrentValue());
         }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java Fri Jan 23 19:59:11 2015
@@ -30,7 +30,9 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -414,7 +416,13 @@ public class VectorColumnAssignFactory {
             if (val == null) {
               assignNull(destIndex);
             } else {
-              HiveVarchar hiveVarchar = (HiveVarchar) val;
+              // We store VARCHAR type stripped of pads.
+              HiveVarchar hiveVarchar;
+              if (val instanceof HiveVarchar) {
+                hiveVarchar = (HiveVarchar) val;
+              } else {
+                hiveVarchar = ((HiveVarcharWritable) val).getHiveVarchar();
+              }
               byte[] bytes = hiveVarchar.getValue().getBytes();
               assignBytes(bytes, 0, bytes.length, destIndex);
             }
@@ -429,7 +437,12 @@ public class VectorColumnAssignFactory {
               assignNull(destIndex);
             } else {
               // We store CHAR type stripped of pads.
-              HiveChar hiveChar = (HiveChar) val;
+              HiveChar hiveChar;
+              if (val instanceof HiveChar) {
+                hiveChar = (HiveChar) val;
+              } else {
+                hiveChar = ((HiveCharWritable) val).getHiveChar();
+              }
               byte[] bytes = hiveChar.getStrippedValue().getBytes();
               assignBytes(bytes, 0, bytes.length, destIndex);
             }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java Fri Jan 23 19:59:11 2015
@@ -70,6 +70,7 @@ public class VectorGroupByOperator exten
    * Key vector expressions.
    */
   private VectorExpression[] keyExpressions;
+  private int outputKeyLength;
 
   private boolean isVectorOutput;
 
@@ -768,9 +769,16 @@ public class VectorGroupByOperator exten
     List<ExprNodeDesc> keysDesc = conf.getKeys();
     try {
 
-      keyOutputWriters = new VectorExpressionWriter[keyExpressions.length];
+      List<String> outputFieldNames = conf.getOutputColumnNames();
+
+      // grouping id should be pruned, which is the last of key columns
+      // see ColumnPrunerGroupByProc
+      outputKeyLength = 
+          conf.pruneGroupingSetId() ? keyExpressions.length - 1 : keyExpressions.length;
+      
+      keyOutputWriters = new VectorExpressionWriter[outputKeyLength];
 
-      for(int i = 0; i < keyExpressions.length; ++i) {
+      for(int i = 0; i < outputKeyLength; ++i) {
         keyOutputWriters[i] = VectorExpressionWriterFactory.
             genVectorExpressionWritable(keysDesc.get(i));
         objectInspectors.add(keyOutputWriters[i].getObjectInspector());
@@ -788,7 +796,6 @@ public class VectorGroupByOperator exten
         aggregationBatchInfo.compileAggregationBatchInfo(aggregators);
       }
       LOG.warn("VectorGroupByOperator is vector output " + isVectorOutput);
-      List<String> outputFieldNames = conf.getOutputColumnNames();
       outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
           outputFieldNames, objectInspectors);
       if (isVectorOutput) {
@@ -807,9 +814,9 @@ public class VectorGroupByOperator exten
 
     initializeChildren(hconf);
 
-    forwardCache = new Object[keyExpressions.length + aggregators.length];
+    forwardCache = new Object[outputKeyLength + aggregators.length];
 
-    if (keyExpressions.length == 0) {
+    if (outputKeyLength == 0) {
         processingMode = this.new ProcessingModeGlobalAggregate();
     } else if (conf.getVectorDesc().isVectorGroupBatches()) {
       // Sorted GroupBy of vector batches where an individual batch has the same group key (e.g. reduce).
@@ -872,7 +879,7 @@ public class VectorGroupByOperator exten
     int fi = 0;
     if (!isVectorOutput) {
       // Output row.
-      for (int i = 0; i < keyExpressions.length; ++i) {
+      for (int i = 0; i < outputKeyLength; ++i) {
         forwardCache[fi++] = keyWrappersBatch.getWritableKeyValue (
             kw, i, keyOutputWriters[i]);
       }
@@ -886,7 +893,7 @@ public class VectorGroupByOperator exten
       forward(forwardCache, outputObjInspector);
     } else {
       // Output keys and aggregates into the output batch.
-      for (int i = 0; i < keyExpressions.length; ++i) {
+      for (int i = 0; i < outputKeyLength; ++i) {
         vectorColumnAssign[fi++].assignObjectValue(keyWrappersBatch.getWritableKeyValue (
                   kw, i, keyOutputWriters[i]), outputBatch.size);
       }
@@ -910,7 +917,7 @@ public class VectorGroupByOperator exten
    */
   private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buffer)
       throws HiveException {
-    int fi = keyExpressions.length;   // Start after group keys.
+    int fi = outputKeyLength;   // Start after group keys.
     for (int i = 0; i < aggregators.length; ++i) {
       vectorColumnAssign[fi++].assignObjectValue(aggregators[i].evaluateOutput(
                 agg.getAggregationBuffer(i)), outputBatch.size);

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java Fri Jan 23 19:59:11 2015
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec.v
 
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalToStringUnaryUDF;
 
 /**
  * To support vectorized cast of decimal to string.
@@ -49,9 +48,8 @@ public class CastDecimalToString extends
     try {
       b = s.getBytes("UTF-8");
     } catch (Exception e) {
-
       // This should never happen. If it does, there is a bug.
-      throw new RuntimeException("Internal error:  unable to convert decimal to string");
+      throw new RuntimeException("Internal error:  unable to convert decimal to string", e);
     }
     assign(outV, i, b, b.length);
   }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java Fri Jan 23 19:59:11 2015
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.udf;
 
-import java.io.IOException;
 import java.io.Serializable;
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -107,7 +106,7 @@ public class VectorUDFArgDesc implements
         o = writers[argPosition].writeValue(cv, row);
         return new GenericUDF.DeferredJavaObject(o);
       } catch (HiveException e) {
-        throw new RuntimeException("Unable to get Java object from VectorizedRowBatch");
+        throw new RuntimeException("Unable to get Java object from VectorizedRowBatch", e);
       }
     }
   }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java Fri Jan 23 19:59:11 2015
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FSDataInputS
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
@@ -98,7 +99,7 @@ public class HiveIndexResult {
         FileSystem fs = indexFilePath.getFileSystem(conf);
         FileStatus indexStat = fs.getFileStatus(indexFilePath);
         if (indexStat.isDir()) {
-          FileStatus[] fss = fs.listStatus(indexFilePath);
+          FileStatus[] fss = fs.listStatus(indexFilePath, FileUtils.HIDDEN_FILES_PATH_FILTER);
           for (FileStatus f : fss) {
             paths.add(f.getPath());
           }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java Fri Jan 23 19:59:11 2015
@@ -185,7 +185,7 @@ public class HiveIndexedInputFormat exte
         }
       } catch (HiveException e) {
         throw new RuntimeException(
-            "Unable to get metadata for input table split" + split.getPath());
+            "Unable to get metadata for input table split" + split.getPath(), e);
       }
     }
     InputSplit retA[] = newSplits.toArray((new FileSplit[newSplits.size()]));

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Fri Jan 23 19:59:11 2015
@@ -27,7 +27,6 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -92,14 +91,7 @@ public class BucketizedHiveInputFormat<K
     List<IOException> errors = new ArrayList<IOException>();
 
     FileSystem fs = dir.getFileSystem(job);
-    FileStatus[] matches = fs.globStatus(dir, new PathFilter() {
-
-      @Override
-      public boolean accept(Path p) {
-        String name = p.getName();
-        return !name.startsWith("_") && !name.startsWith(".");
-      }
-    });
+    FileStatus[] matches = fs.globStatus(dir, FileUtils.HIDDEN_FILES_PATH_FILTER);
     if (matches == null) {
       errors.add(new IOException("Input path does not exist: " + dir));
     } else if (matches.length == 0) {
@@ -113,7 +105,8 @@ public class BucketizedHiveInputFormat<K
     if (!errors.isEmpty()) {
       throw new InvalidInputException(errors);
     }
-    LOG.info("Total input paths to process : " + result.size());
+    LOG.debug("Matches for " + dir + ": " + result);
+    LOG.info("Total input paths to process : " + result.size() + " from dir " + dir);
     return result.toArray(new FileStatus[result.size()]);
 
   }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Fri Jan 23 19:59:11 2015
@@ -30,6 +30,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.Queue;
 import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -38,6 +42,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -74,6 +79,48 @@ public class CombineHiveInputFormat<K ex
   private static final String CLASS_NAME = CombineHiveInputFormat.class.getName();
   public static final Log LOG = LogFactory.getLog(CLASS_NAME);
 
+  // max number of threads we can use to check non-combinable paths
+  private static final int MAX_CHECK_NONCOMBINABLE_THREAD_NUM = 50;
+  private static final int DEFAULT_NUM_PATH_PER_THREAD = 100;
+
+  private class CheckNonCombinablePathCallable implements Callable<Set<Integer>> {
+    private final Path[] paths;
+    private final int start;
+    private final int length;
+    private final JobConf conf;
+
+    public CheckNonCombinablePathCallable(Path[] paths, int start, int length, JobConf conf) {
+      this.paths = paths;
+      this.start = start;
+      this.length = length;
+      this.conf = conf;
+    }
+
+    @Override
+    public Set<Integer> call() throws Exception {
+      Set<Integer> nonCombinablePathIndices = new HashSet<Integer>();
+      for (int i = 0; i < length; i++) {
+        PartitionDesc part =
+            HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+                pathToPartitionInfo, paths[i + start],
+                IOPrepareCache.get().allocatePartitionDescMap());
+        // Use HiveInputFormat if any of the paths is not splittable
+        Class<? extends InputFormat> inputFormatClass = part.getInputFileFormatClass();
+        InputFormat<WritableComparable, Writable> inputFormat =
+            getInputFormatFromCache(inputFormatClass, conf);
+        if (inputFormat instanceof AvoidSplitCombination &&
+            ((AvoidSplitCombination) inputFormat).shouldSkipCombine(paths[i + start], conf)) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("The path [" + paths[i + start] +
+                "] is being parked for HiveInputFormat.getSplits");
+          }
+          nonCombinablePathIndices.add(i);
+        }
+      }
+      return nonCombinablePathIndices;
+    }
+  }
+
   /**
    * CombineHiveInputSplit encapsulates an InputSplit with its corresponding
    * inputFormatClassName. A CombineHiveInputSplit comprises of multiple chunks
@@ -278,8 +325,6 @@ public class CombineHiveInputFormat<K ex
   private InputSplit[] getCombineSplits(JobConf job, int numSplits,
       Map<String, PartitionDesc> pathToPartitionInfo)
       throws IOException {
-    PerfLogger perfLogger = PerfLogger.getPerfLogger();
-    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
     init(job);
     Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
     Map<String, Operator<? extends OperatorDesc>> aliasToWork =
@@ -290,7 +335,6 @@ public class CombineHiveInputFormat<K ex
     InputSplit[] splits = null;
     if (combine == null) {
       splits = super.getSplits(job, numSplits);
-      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
       return splits;
     }
 
@@ -349,13 +393,12 @@ public class CombineHiveInputFormat<K ex
           } else if ((new CompressionCodecFactory(job)).getCodec(path) != null) {
             //if compresssion codec is set, use HiveInputFormat.getSplits (don't combine)
             splits = super.getSplits(job, numSplits);
-            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
             return splits;
           }
 
           while (dirs.peek() != null) {
             Path tstPath = dirs.remove();
-            FileStatus[] fStatus = inpFs.listStatus(tstPath);
+            FileStatus[] fStatus = inpFs.listStatus(tstPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
             for (int idx = 0; idx < fStatus.length; idx++) {
               if (fStatus[idx].isDir()) {
                 dirs.offer(fStatus[idx].getPath());
@@ -363,7 +406,6 @@ public class CombineHiveInputFormat<K ex
                   fStatus[idx].getPath()) != null) {
                 //if compresssion codec is set, use HiveInputFormat.getSplits (don't combine)
                 splits = super.getSplits(job, numSplits);
-                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
                 return splits;
               }
             }
@@ -373,7 +415,6 @@ public class CombineHiveInputFormat<K ex
       //don't combine if inputformat is a SymlinkTextInputFormat
       if (inputFormat instanceof SymlinkTextInputFormat) {
         splits = super.getSplits(job, numSplits);
-        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
         return splits;
       }
 
@@ -451,7 +492,6 @@ public class CombineHiveInputFormat<K ex
     }
 
     LOG.info("number of splits " + result.size());
-    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
     return result.toArray(new CombineHiveInputSplit[result.size()]);
   }
 
@@ -460,6 +500,8 @@ public class CombineHiveInputFormat<K ex
    */
   @Override
   public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    PerfLogger perfLogger = PerfLogger.getPerfLogger();
+    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
     init(job);
 
     ArrayList<InputSplit> result = new ArrayList<InputSplit>();
@@ -469,26 +511,37 @@ public class CombineHiveInputFormat<K ex
     List<Path> nonCombinablePaths = new ArrayList<Path>(paths.length / 2);
     List<Path> combinablePaths = new ArrayList<Path>(paths.length / 2);
 
-    for (Path path : paths) {
-
-      PartitionDesc part =
-          HiveFileFormatUtils.getPartitionDescFromPathRecursively(
-              pathToPartitionInfo, path,
-              IOPrepareCache.get().allocatePartitionDescMap());
-
-      // Use HiveInputFormat if any of the paths is not splittable
-      Class<? extends InputFormat> inputFormatClass = part.getInputFileFormatClass();
-      InputFormat<WritableComparable, Writable> inputFormat = getInputFormatFromCache(inputFormatClass, job);
-      if (inputFormat instanceof AvoidSplitCombination &&
-          ((AvoidSplitCombination) inputFormat).shouldSkipCombine(path, job)) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("The split [" + path +
-              "] is being parked for HiveInputFormat.getSplits");
+    int numThreads = Math.min(MAX_CHECK_NONCOMBINABLE_THREAD_NUM,
+        (int) Math.ceil((double) paths.length / DEFAULT_NUM_PATH_PER_THREAD));
+    int numPathPerThread = (int) Math.ceil((double) paths.length / numThreads);
+    LOG.info("Total number of paths: " + paths.length +
+        ", launching " + numThreads + " threads to check non-combinable ones.");
+    ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+    List<Future<Set<Integer>>> futureList = new ArrayList<Future<Set<Integer>>>(numThreads);
+    try {
+      for (int i = 0; i < numThreads; i++) {
+        int start = i * numPathPerThread;
+        int length = i != numThreads - 1 ? numPathPerThread : paths.length - start;
+        futureList.add(executor.submit(
+            new CheckNonCombinablePathCallable(paths, start, length, job)));
+      }
+      Set<Integer> nonCombinablePathIndices = new HashSet<Integer>();
+      for (Future<Set<Integer>> future : futureList) {
+        nonCombinablePathIndices.addAll(future.get());
+      }
+      for (int i = 0; i < paths.length; i++) {
+        if (nonCombinablePathIndices.contains(i)) {
+          nonCombinablePaths.add(paths[i]);
+        } else {
+          combinablePaths.add(paths[i]);
         }
-        nonCombinablePaths.add(path);
-      } else {
-        combinablePaths.add(path);
       }
+    } catch (Exception e) {
+      LOG.error("Error checking non-combinable path", e);
+      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
+      throw new IOException(e);
+    } finally {
+      executor.shutdownNow();
     }
 
     // Store the previous value for the path specification
@@ -528,6 +581,7 @@ public class CombineHiveInputFormat<K ex
       job.set(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname, oldPaths);
     }
     LOG.info("Number of all splits " + result.size());
+    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
     return result.toArray(new InputSplit[result.size()]);
   }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java Fri Jan 23 19:59:11 2015
@@ -66,6 +66,7 @@ public abstract class HiveContextAwareRe
   private boolean wasUsingSortedSearch = false;
   private String genericUDFClassName = null;
   private final List<Comparison> stopComparisons = new ArrayList<Comparison>();
+  private Map<String, PartitionDesc> pathToPartitionInfo;
 
   protected RecordReader recordReader;
   protected JobConf jobConf;
@@ -116,11 +117,11 @@ public abstract class HiveContextAwareRe
       if(retVal) {
         if(key instanceof RecordIdentifier) {
           //supports AcidInputFormat which uses the KEY pass ROW__ID info
-          ioCxtRef.ri = (RecordIdentifier)key;
+          ioCxtRef.setRecordIdentifier((RecordIdentifier)key);
         }
         else if(recordReader instanceof AcidInputFormat.AcidRecordReader) {
           //supports AcidInputFormat which do not use the KEY pass ROW__ID info
-          ioCxtRef.ri = ((AcidInputFormat.AcidRecordReader) recordReader).getRecordIdentifier();
+          ioCxtRef.setRecordIdentifier(((AcidInputFormat.AcidRecordReader) recordReader).getRecordIdentifier());
         }
       }
       return retVal;
@@ -133,43 +134,43 @@ public abstract class HiveContextAwareRe
   protected void updateIOContext()
       throws IOException {
     long pointerPos = this.getPos();
-    if (!ioCxtRef.isBlockPointer) {
-      ioCxtRef.currentBlockStart = pointerPos;
-      ioCxtRef.currentRow = 0;
+    if (!ioCxtRef.isBlockPointer()) {
+      ioCxtRef.setCurrentBlockStart(pointerPos);
+      ioCxtRef.setCurrentRow(0);
       return;
     }
 
-    ioCxtRef.currentRow++;
+    ioCxtRef.setCurrentRow(ioCxtRef.getCurrentRow() + 1);
 
-    if (ioCxtRef.nextBlockStart == -1) {
-      ioCxtRef.nextBlockStart = pointerPos;
-      ioCxtRef.currentRow = 0;
+    if (ioCxtRef.getNextBlockStart() == -1) {
+      ioCxtRef.setNextBlockStart(pointerPos);
+      ioCxtRef.setCurrentRow(0);
     }
-    if (pointerPos != ioCxtRef.nextBlockStart) {
+    if (pointerPos != ioCxtRef.getNextBlockStart()) {
       // the reader pointer has moved to the end of next block, or the end of
       // current record.
 
-      ioCxtRef.currentRow = 0;
+      ioCxtRef.setCurrentRow(0);
 
-      if (ioCxtRef.currentBlockStart == ioCxtRef.nextBlockStart) {
-        ioCxtRef.currentRow = 1;
+      if (ioCxtRef.getCurrentBlockStart() == ioCxtRef.getNextBlockStart()) {
+        ioCxtRef.setCurrentRow(1);
       }
 
-      ioCxtRef.currentBlockStart = ioCxtRef.nextBlockStart;
-      ioCxtRef.nextBlockStart = pointerPos;
+      ioCxtRef.setCurrentBlockStart(ioCxtRef.getNextBlockStart());
+      ioCxtRef.setNextBlockStart(pointerPos);
     }
   }
 
   public IOContext getIOContext() {
-    return IOContext.get(jobConf.get(Utilities.INPUT_NAME));
+    return IOContext.get(jobConf);
   }
 
   private void initIOContext(long startPos, boolean isBlockPointer,
       Path inputPath) {
     ioCxtRef = this.getIOContext();
-    ioCxtRef.currentBlockStart = startPos;
-    ioCxtRef.isBlockPointer = isBlockPointer;
-    ioCxtRef.inputPath = inputPath;
+    ioCxtRef.setCurrentBlockStart(startPos);
+    ioCxtRef.setBlockPointer(isBlockPointer);
+    ioCxtRef.setInputPath(inputPath);
     LOG.info("Processing file " + inputPath);
     initDone = true;
   }
@@ -222,7 +223,7 @@ public abstract class HiveContextAwareRe
       // Binary search only works if we know the size of the split, and the recordReader is an
       // RCFileRecordReader
       this.getIOContext().setUseSorted(true);
-      this.getIOContext().setIsBinarySearching(true);
+      this.getIOContext().setBinarySearching(true);
       this.wasUsingSortedSearch = true;
     } else {
       // Use the defalut methods for next in the child class
@@ -284,7 +285,7 @@ public abstract class HiveContextAwareRe
           // binary search, if the new position at least as big as the size of the split, any
           // matching rows must be in the final block, so we can end the binary search.
           if (newPosition == previousPosition || newPosition >= splitEnd) {
-            this.getIOContext().setIsBinarySearching(false);
+            this.getIOContext().setBinarySearching(false);
             sync(rangeStart);
           }
 
@@ -310,8 +311,10 @@ public abstract class HiveContextAwareRe
         Path filePath = this.ioCxtRef.getInputPath();
         PartitionDesc part = null;
         try {
-          Map<String, PartitionDesc> pathToPartitionInfo = Utilities
+          if (pathToPartitionInfo == null) {
+            pathToPartitionInfo = Utilities
               .getMapWork(jobConf).getPathToPartitionInfo();
+          }
           part = HiveFileFormatUtils
               .getPartitionDescFromPathRecursively(pathToPartitionInfo,
                   filePath, IOPrepareCache.get().getPartitionDescMap());
@@ -402,7 +405,7 @@ public abstract class HiveContextAwareRe
    */
   private void beginLinearSearch() throws IOException {
     sync(rangeStart);
-    this.getIOContext().setIsBinarySearching(false);
+    this.getIOContext().setBinarySearching(false);
     this.wasUsingSortedSearch = false;
   }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java Fri Jan 23 19:59:11 2015
@@ -21,8 +21,10 @@ package org.apache.hadoop.hive.ql.io;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
-
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 
 /**
  * IOContext basically contains the position information of the current
@@ -34,18 +36,30 @@ import org.apache.hadoop.fs.Path;
  */
 public class IOContext {
 
-  private static ThreadLocal<IOContext> threadLocal = new ThreadLocal<IOContext>(){
+  /**
+   * Spark uses this thread local
+   */
+  private static final ThreadLocal<IOContext> threadLocal = new ThreadLocal<IOContext>(){
     @Override
     protected synchronized IOContext initialValue() { return new IOContext(); }
  };
 
-  private static Map<String, IOContext> inputNameIOContextMap = new HashMap<String, IOContext>();
-  public static Map<String, IOContext> getMap() {
-    return inputNameIOContextMap;
+  private static IOContext get() {
+    return IOContext.threadLocal.get();
   }
 
-  public static IOContext get(String inputName) {
-    if (inputNameIOContextMap.containsKey(inputName) == false) {
+  /**
+   * Tez and MR use this map but are single threaded per JVM thus no synchronization is required.
+   */
+  private static final Map<String, IOContext> inputNameIOContextMap = new HashMap<String, IOContext>();
+
+
+  public static IOContext get(Configuration conf) {
+    if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+      return get();
+    }
+    String inputName = conf.get(Utilities.INPUT_NAME);
+    if (!inputNameIOContextMap.containsKey(inputName)) {
       IOContext ioContext = new IOContext();
       inputNameIOContextMap.put(inputName, ioContext);
     }
@@ -58,26 +72,26 @@ public class IOContext {
     inputNameIOContextMap.clear();
   }
 
-  long currentBlockStart;
-  long nextBlockStart;
-  long currentRow;
-  boolean isBlockPointer;
-  boolean ioExceptions;
+  private long currentBlockStart;
+  private long nextBlockStart;
+  private long currentRow;
+  private boolean isBlockPointer;
+  private boolean ioExceptions;
 
   // Are we using the fact the input is sorted
-  boolean useSorted = false;
+  private boolean useSorted = false;
   // Are we currently performing a binary search
-  boolean isBinarySearching = false;
+  private boolean isBinarySearching = false;
   // Do we want to end the binary search
-  boolean endBinarySearch = false;
+  private boolean endBinarySearch = false;
   // The result of the comparison of the last row processed
-  Comparison comparison = null;
+  private Comparison comparison = null;
   // The class name of the generic UDF being used by the filter
-  String genericUDFClassName = null;
+  private String genericUDFClassName = null;
   /**
    * supports {@link org.apache.hadoop.hive.ql.metadata.VirtualColumn#ROWID}
    */
-  public RecordIdentifier ri;
+  private  RecordIdentifier ri;
 
   public static enum Comparison {
     GREATER,
@@ -86,7 +100,7 @@ public class IOContext {
     UNKNOWN
   }
 
-  Path inputPath;
+  private Path inputPath;
 
   public IOContext() {
     this.currentBlockStart = 0;
@@ -156,7 +170,7 @@ public class IOContext {
     return isBinarySearching;
   }
 
-  public void setIsBinarySearching(boolean isBinarySearching) {
+  public void setBinarySearching(boolean isBinarySearching) {
     this.isBinarySearching = isBinarySearching;
   }
 
@@ -197,6 +211,14 @@ public class IOContext {
     this.genericUDFClassName = genericUDFClassName;
   }
 
+  public RecordIdentifier getRecordIdentifier() {
+    return this.ri;
+  }
+
+  public void setRecordIdentifier(RecordIdentifier ri) {
+    this.ri = ri;
+  }
+
   /**
    * The thread local IOContext is static, we may need to restart the search if, for instance,
    * multiple files are being searched as part of a CombinedHiveRecordReader
@@ -208,4 +230,5 @@ public class IOContext {
     this.comparison = null;
     this.genericUDFClassName = null;
   }
+
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java Fri Jan 23 19:59:11 2015
@@ -29,6 +29,7 @@ import java.util.Map;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -58,7 +59,7 @@ public class SymbolicInputFormat impleme
         if (!fStatus.isDir()) {
           symlinks = new FileStatus[] { fStatus };
         } else {
-          symlinks = fileSystem.listStatus(symlinkDir);
+          symlinks = fileSystem.listStatus(symlinkDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
         }
         toRemovePaths.add(path);
         ArrayList<String> aliases = pathToAliases.remove(path);

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java Fri Jan 23 19:59:11 2015
@@ -23,19 +23,15 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
-import org.apache.hadoop.hive.ql.plan.MapredWork;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -192,7 +188,7 @@ public class SymlinkTextInputFormat exte
       List<Path> targetPaths, List<Path> symlinkPaths) throws IOException {
     for (Path symlinkDir : symlinksDirs) {
       FileSystem fileSystem = symlinkDir.getFileSystem(conf);
-      FileStatus[] symlinks = fileSystem.listStatus(symlinkDir);
+      FileStatus[] symlinks = fileSystem.listStatus(symlinkDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
 
       // Read paths from each symlink file.
       for (FileStatus symlink : symlinks) {

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java Fri Jan 23 19:59:11 2015
@@ -60,7 +60,7 @@ public class AvroContainerOutputFormat
          Properties properties, Progressable progressable) throws IOException {
     Schema schema;
     try {
-      schema = AvroSerdeUtils.determineSchemaOrThrowException(properties);
+      schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
     } catch (AvroSerdeException e) {
       throw new IOException(e);
     }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java Fri Jan 23 19:59:11 2015
@@ -109,7 +109,7 @@ public class AvroGenericRecordReader imp
 
           Properties props = pathsAndParts.getValue().getProperties();
           if(props.containsKey(AvroSerdeUtils.SCHEMA_LITERAL) || props.containsKey(AvroSerdeUtils.SCHEMA_URL)) {
-            return AvroSerdeUtils.determineSchemaOrThrowException(props);
+            return AvroSerdeUtils.determineSchemaOrThrowException(job, props);
           }
           else {
             return null; // If it's not in this property, it won't be in any others

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java Fri Jan 23 19:59:11 2015
@@ -27,4 +27,10 @@ public interface ColumnStatistics {
    * @return the number of values
    */
   long getNumberOfValues();
+
+  /**
+   * Returns true if there are nulls in the scope of column statistics.
+   * @return true if null present else false
+   */
+  boolean hasNull();
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Fri Jan 23 19:59:11 2015
@@ -17,8 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
-import java.sql.Timestamp;
-
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -26,6 +24,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 
+import java.sql.Timestamp;
+
 class ColumnStatisticsImpl implements ColumnStatistics {
 
   private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
@@ -816,11 +816,16 @@ class ColumnStatisticsImpl implements Co
   }
 
   private long count = 0;
+  private boolean hasNull = false;
 
   ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
     if (stats.hasNumberOfValues()) {
       count = stats.getNumberOfValues();
     }
+
+    if (stats.hasHasNull()) {
+      hasNull = stats.getHasNull();
+    }
   }
 
   ColumnStatisticsImpl() {
@@ -830,6 +835,10 @@ class ColumnStatisticsImpl implements Co
     count += 1;
   }
 
+  void setNull() {
+    hasNull = true;
+  }
+
   void updateBoolean(boolean value) {
     throw new UnsupportedOperationException("Can't update boolean");
   }
@@ -864,10 +873,12 @@ class ColumnStatisticsImpl implements Co
 
   void merge(ColumnStatisticsImpl stats) {
     count += stats.count;
+    hasNull |= stats.hasNull;
   }
 
   void reset() {
     count = 0;
+    hasNull = false;
   }
 
   @Override
@@ -876,14 +887,20 @@ class ColumnStatisticsImpl implements Co
   }
 
   @Override
+  public boolean hasNull() {
+    return hasNull;
+  }
+
+  @Override
   public String toString() {
-    return "count: " + count;
+    return "count: " + count + " hasNull: " + hasNull;
   }
 
   OrcProto.ColumnStatistics.Builder serialize() {
     OrcProto.ColumnStatistics.Builder builder =
       OrcProto.ColumnStatistics.newBuilder();
     builder.setNumberOfValues(count);
+    builder.setHasNull(hasNull);
     return builder;
   }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Fri Jan 23 19:59:11 2015
@@ -17,15 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.Map;
-
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.GnuParser;
 import org.apache.commons.cli.HelpFormatter;
@@ -46,6 +37,14 @@ import org.apache.hadoop.io.LongWritable
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONWriter;
 
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
 /**
  * A tool for printing out the file structure of ORC files.
  */
@@ -170,10 +169,7 @@ public final class FileDump {
                 buf.append("no stats at ");
               } else {
                 ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
-                Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs);
-                buf.append(" count: ").append(cs.getNumberOfValues());
-                buf.append(" min: ").append(min);
-                buf.append(" max: ").append(max);
+                buf.append(cs.toString());
               }
               buf.append(" positions: ");
               for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Fri Jan 23 19:59:11 2015
@@ -18,18 +18,9 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.NavigableMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -58,9 +49,9 @@ import org.apache.hadoop.hive.ql.io.Reco
 import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -78,9 +69,18 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.util.StringUtils;
 
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 /**
  * A MapReduce/Hive input format for ORC files.
  * <p>
@@ -919,13 +919,8 @@ public class OrcInputFormat  implements
         if (filterColumns[pred] != -1) {
 
           // column statistics at index 0 contains only the number of rows
-          ColumnStatistics stats =
-              stripeStatistics.getColumnStatistics()[filterColumns[pred]];
-          Object minValue = RecordReaderImpl.getMin(stats);
-          Object maxValue = RecordReaderImpl.getMax(stats);
-          truthValues[pred] =
-              RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
-                  minValue, maxValue);
+          ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+          truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
         } else {
 
           // parition column case.

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Fri Jan 23 19:59:11 2015
@@ -2377,21 +2377,36 @@ public class RecordReaderImpl implements
   /**
    * Evaluate a predicate with respect to the statistics from the column
    * that is referenced in the predicate.
-   * @param index the statistics for the column mentioned in the predicate
+   * @param statsProto the statistics for the column mentioned in the predicate
    * @param predicate the leaf predicate we need to evaluation
    * @return the set of truth values that may be returned for the given
    *   predicate.
    */
-  static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index,
+  static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
                                       PredicateLeaf predicate) {
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index);
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
     Object minValue = getMin(cs);
     Object maxValue = getMax(cs);
-    return evaluatePredicateRange(predicate, minValue, maxValue);
+    return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull());
+  }
+
+  /**
+   * Evaluate a predicate with respect to the statistics from the column
+   * that is referenced in the predicate.
+   * @param stats the statistics for the column mentioned in the predicate
+   * @param predicate the leaf predicate we need to evaluation
+   * @return the set of truth values that may be returned for the given
+   *   predicate.
+   */
+  static TruthValue evaluatePredicate(ColumnStatistics stats,
+      PredicateLeaf predicate) {
+    Object minValue = getMin(stats);
+    Object maxValue = getMax(stats);
+    return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull());
   }
 
   static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
-      Object max) {
+      Object max, boolean hasNull) {
     // if we didn't have any values, everything must have been null
     if (min == null) {
       if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2426,29 +2441,29 @@ public class RecordReaderImpl implements
       case EQUALS:
         loc = compareToRange((Comparable) predObj, minValue, maxValue);
         if (minValue.equals(maxValue) && loc == Location.MIN) {
-          return TruthValue.YES_NULL;
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE || loc == Location.AFTER) {
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
-          return TruthValue.YES_NO_NULL;
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case LESS_THAN:
         loc = compareToRange((Comparable) predObj, minValue, maxValue);
         if (loc == Location.AFTER) {
-          return TruthValue.YES_NULL;
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE || loc == Location.MIN) {
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
-          return TruthValue.YES_NO_NULL;
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case LESS_THAN_EQUALS:
         loc = compareToRange((Comparable) predObj, minValue, maxValue);
         if (loc == Location.AFTER || loc == Location.MAX) {
-          return TruthValue.YES_NULL;
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE) {
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
-          return TruthValue.YES_NO_NULL;
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case IN:
         if (minValue.equals(maxValue)) {
@@ -2458,10 +2473,10 @@ public class RecordReaderImpl implements
             predObj = getBaseObjectForComparison(arg, minValue);
             loc = compareToRange((Comparable) predObj, minValue, maxValue);
             if (loc == Location.MIN) {
-              return TruthValue.YES_NULL;
+              return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
             }
           }
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
           // are all of the values outside of the range?
           for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
@@ -2469,10 +2484,10 @@ public class RecordReaderImpl implements
             loc = compareToRange((Comparable) predObj, minValue, maxValue);
             if (loc == Location.MIN || loc == Location.MIDDLE ||
                 loc == Location.MAX) {
-              return TruthValue.YES_NO_NULL;
+              return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
             }
           }
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         }
       case BETWEEN:
         List<Object> args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC);
@@ -2484,26 +2499,26 @@ public class RecordReaderImpl implements
 
           Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
           if (loc2 == Location.AFTER || loc2 == Location.MAX) {
-            return TruthValue.YES_NULL;
+            return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
           } else if (loc2 == Location.BEFORE) {
-            return TruthValue.NO_NULL;
+            return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
           } else {
-            return TruthValue.YES_NO_NULL;
+            return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
           }
         } else if (loc == Location.AFTER) {
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
-          return TruthValue.YES_NO_NULL;
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case IS_NULL:
-        return TruthValue.YES_NO;
+        return hasNull ? TruthValue.YES : TruthValue.NO;
       default:
-        return TruthValue.YES_NO_NULL;
+        return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
       }
 
       // in case failed conversion, return the default YES_NO_NULL truth value
     } catch (NumberFormatException nfe) {
-      return TruthValue.YES_NO_NULL;
+      return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
     }
   }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Fri Jan 23 19:59:11 2015
@@ -656,6 +656,8 @@ class WriterImpl implements Writer, Memo
     void write(Object obj) throws IOException {
       if (obj != null) {
         indexStatistics.increment();
+      } else {
+        indexStatistics.setNull();
       }
       if (isPresent != null) {
         isPresent.write(obj == null ? 0 : 1);

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Fri Jan 23 19:59:11 2015
@@ -66,8 +66,8 @@ public class DataWritableWriter {
         writeGroupFields(record, schema);
       } catch (RuntimeException e) {
         String errorMessage = "Parquet record is malformed: " + e.getMessage();
-        LOG.error(errorMessage);
-        throw new RuntimeException(errorMessage);
+        LOG.error(errorMessage, e);
+        throw new RuntimeException(errorMessage, e);
       }
       recordConsumer.endMessage();
     }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java Fri Jan 23 19:59:11 2015
@@ -51,7 +51,6 @@ import org.apache.hadoop.hive.ql.session
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.hive.ql.stats.StatsFactory;
 import org.apache.hadoop.hive.ql.stats.StatsPublisher;
-import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.Counters;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -148,7 +147,7 @@ public class PartialScanTask extends Tas
       job.setInputFormat((Class<? extends InputFormat>) (Class
           .forName(inpFormat)));
     } catch (ClassNotFoundException e) {
-      throw new RuntimeException(e.getMessage());
+      throw new RuntimeException(e.getMessage(), e);
     }
 
     job.setOutputKeyClass(NullWritable.class);

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java Fri Jan 23 19:59:11 2015
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.io.Hive
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
 import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.Counters;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -124,7 +123,7 @@ public class ColumnTruncateTask extends
       job.setInputFormat((Class<? extends InputFormat>) (Class
           .forName(inpFormat)));
     } catch (ClassNotFoundException e) {
-      throw new RuntimeException(e.getMessage());
+      throw new RuntimeException(e.getMessage(), e);
     }
 
     Path outputPath = this.work.getOutputDir();

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java Fri Jan 23 19:59:11 2015
@@ -254,9 +254,13 @@ class DummyTxnManager extends HiveTxnMan
 
   private HiveLockMode getWriteEntityLockMode (WriteEntity we) {
     HiveLockMode lockMode = we.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED;
-    //but the writeEntity is complete in DDL operations, and we need check its writeType to
-    //to determine the lockMode
-    switch (we.getWriteType()) {
+    //but the writeEntity is complete in DDL operations, instead DDL sets the writeType, so
+    //we use it to determine its lockMode, and first we check if the writeType was set
+    WriteEntity.WriteType writeType = we.getWriteType();
+    if (writeType == null) {
+      return lockMode;
+    }
+    switch (writeType) {
       case DDL_EXCLUSIVE:
         return HiveLockMode.EXCLUSIVE;
       case DDL_SHARED: