You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/08/20 08:32:56 UTC

[1/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast DeserializeRead classes; display better exception information; add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/master 64e916245 -> 2d1f403dd


http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java b/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
new file mode 100644
index 0000000..f08a075
--- /dev/null
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
@@ -0,0 +1,423 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.RandomTypeUtil;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hive.common.util.DateUtils;
+
+/**
+ * Generate object inspector and random row object[].
+ */
+public class SerdeRandomRowSource {
+
+  private Random r;
+
+  private int columnCount;
+
+  private List<String> typeNames;
+
+  private PrimitiveCategory[] primitiveCategories;
+
+  private PrimitiveTypeInfo[] primitiveTypeInfos;
+
+  private List<ObjectInspector> primitiveObjectInspectorList;
+
+  private StructObjectInspector rowStructObjectInspector;
+
+  public List<String> typeNames() {
+    return typeNames;
+  }
+
+  public PrimitiveCategory[] primitiveCategories() {
+    return primitiveCategories;
+  }
+
+  public PrimitiveTypeInfo[] primitiveTypeInfos() {
+    return primitiveTypeInfos;
+  }
+
+  public StructObjectInspector rowStructObjectInspector() {
+    return rowStructObjectInspector;
+  }
+
+  public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
+    ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
+        new ArrayList<ObjectInspector>(partialFieldCount);
+    List<String> columnNames = new ArrayList<String>(partialFieldCount);
+    for (int i = 0; i < partialFieldCount; i++) {
+      columnNames.add(String.format("partial%d", i));
+      partialPrimitiveObjectInspectorList.add(
+          PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+              primitiveTypeInfos[i]));
+    }
+
+    return ObjectInspectorFactory.getStandardStructObjectInspector(
+        columnNames, primitiveObjectInspectorList);
+  }
+
+  public void init(Random r) {
+    this.r = r;
+    chooseSchema();
+  }
+
+  /*
+   * For now, exclude CHAR until we determine why there is a difference (blank padding)
+   * serializing with LazyBinarySerializeWrite and the regular SerDe...
+   */
+  private static String[] possibleHiveTypeNames = {
+      "boolean",
+      "tinyint",
+      "smallint",
+      "int",
+      "bigint",
+      "date",
+      "float",
+      "double",
+      "string",
+//    "char",
+      "varchar",
+      "binary",
+      "date",
+      "timestamp",
+      "interval_year_month",
+      "interval_day_time",
+      "decimal"
+  };
+
+  private void chooseSchema() {
+    HashSet hashSet = null;
+    boolean allTypes;
+    boolean onlyOne = (r.nextInt(100) == 7);
+    if (onlyOne) {
+      columnCount = 1;
+      allTypes = false;
+    } else {
+      allTypes = r.nextBoolean();
+      if (allTypes) {
+        // One of each type.
+        columnCount = possibleHiveTypeNames.length;
+        hashSet = new HashSet<Integer>();
+      } else {
+        columnCount = 1 + r.nextInt(20);
+      }
+    }
+    typeNames = new ArrayList<String>(columnCount);
+    primitiveCategories = new PrimitiveCategory[columnCount];
+    primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
+    primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
+    List<String> columnNames = new ArrayList<String>(columnCount);
+    for (int c = 0; c < columnCount; c++) {
+      columnNames.add(String.format("col%d", c));
+      String typeName;
+
+      if (onlyOne) {
+        typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
+      } else {
+        int typeNum;
+        if (allTypes) {
+          while (true) {
+            typeNum = r.nextInt(possibleHiveTypeNames.length);
+            Integer typeNumInteger = new Integer(typeNum);
+            if (!hashSet.contains(typeNumInteger)) {
+              hashSet.add(typeNumInteger);
+              break;
+            }
+          }
+        } else {
+          typeNum = r.nextInt(possibleHiveTypeNames.length);
+        }
+        typeName = possibleHiveTypeNames[typeNum];
+      }
+      if (typeName.equals("char")) {
+        int maxLength = 1 + r.nextInt(100);
+        typeName = String.format("char(%d)", maxLength);
+      } else if (typeName.equals("varchar")) {
+        int maxLength = 1 + r.nextInt(100);
+        typeName = String.format("varchar(%d)", maxLength);
+      } else if (typeName.equals("decimal")) {
+        typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
+      }
+      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+      primitiveTypeInfos[c] = primitiveTypeInfo;
+      PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+      primitiveCategories[c] = primitiveCategory;
+      primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+      typeNames.add(typeName);
+    }
+    rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
+  }
+
+  public Object[][] randomRows(int n) {
+    Object[][] result = new Object[n][];
+    for (int i = 0; i < n; i++) {
+      result[i] = randomRow();
+    }
+    return result;
+  }
+
+  public Object[] randomRow() {
+    Object row[] = new Object[columnCount];
+    for (int c = 0; c < columnCount; c++) {
+      Object object = randomObject(c);
+      if (object == null) {
+        throw new Error("Unexpected null for column " + c);
+      }
+      row[c] = getWritableObject(c, object);
+      if (row[c] == null) {
+        throw new Error("Unexpected null for writable for column " + c);
+      }
+    }
+    return row;
+  }
+
+  public static void sort(Object[][] rows, ObjectInspector oi) {
+    for (int i = 0; i < rows.length; i++) {
+      for (int j = i + 1; j < rows.length; j++) {
+        if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
+          Object[] t = rows[i];
+          rows[i] = rows[j];
+          rows[j] = t;
+        }
+      }
+    }
+  }
+
+  public void sort(Object[][] rows) {
+    SerdeRandomRowSource.sort(rows, rowStructObjectInspector);
+  }
+
+  public Object getWritableObject(int column, Object object) {
+    ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
+    PrimitiveCategory primitiveCategory = primitiveCategories[column];
+    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+    switch (primitiveCategory) {
+    case BOOLEAN:
+      return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
+    case BYTE:
+      return ((WritableByteObjectInspector) objectInspector).create((byte) object);
+    case SHORT:
+      return ((WritableShortObjectInspector) objectInspector).create((short) object);
+    case INT:
+      return ((WritableIntObjectInspector) objectInspector).create((int) object);
+    case LONG:
+      return ((WritableLongObjectInspector) objectInspector).create((long) object);
+    case DATE:
+      return ((WritableDateObjectInspector) objectInspector).create((Date) object);
+    case FLOAT:
+      return ((WritableFloatObjectInspector) objectInspector).create((float) object);
+    case DOUBLE:
+      return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
+    case STRING:
+      return ((WritableStringObjectInspector) objectInspector).create((String) object);
+    case CHAR:
+      {
+        WritableHiveCharObjectInspector writableCharObjectInspector =
+                new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
+        return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
+      }
+    case VARCHAR:
+      {
+        WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
+                new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
+        return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
+      }
+    case BINARY:
+      return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
+    case TIMESTAMP:
+      return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
+    case INTERVAL_YEAR_MONTH:
+      return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
+    case INTERVAL_DAY_TIME:
+      return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
+    case DECIMAL:
+      {
+        WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
+                new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
+        return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
+      }
+    default:
+      throw new Error("Unknown primitive category " + primitiveCategory);
+    }
+  }
+
+  public Object randomObject(int column) {
+    PrimitiveCategory primitiveCategory = primitiveCategories[column];
+    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+    switch (primitiveCategory) {
+    case BOOLEAN:
+      return Boolean.valueOf(r.nextInt(1) == 1);
+    case BYTE:
+      return Byte.valueOf((byte) r.nextInt());
+    case SHORT:
+      return Short.valueOf((short) r.nextInt());
+    case INT:
+      return Integer.valueOf(r.nextInt());
+    case LONG:
+      return Long.valueOf(r.nextLong());
+    case DATE:
+      return RandomTypeUtil.getRandDate(r);
+    case FLOAT:
+      return Float.valueOf(r.nextFloat() * 10 - 5);
+    case DOUBLE:
+      return Double.valueOf(r.nextDouble() * 10 - 5);
+    case STRING:
+      return RandomTypeUtil.getRandString(r);
+    case CHAR:
+      return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
+    case VARCHAR:
+      return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
+    case BINARY:
+      return getRandBinary(r, 1 + r.nextInt(100));
+    case TIMESTAMP:
+      return RandomTypeUtil.getRandTimestamp(r);
+    case INTERVAL_YEAR_MONTH:
+      return getRandIntervalYearMonth(r);
+    case INTERVAL_DAY_TIME:
+      return getRandIntervalDayTime(r);
+    case DECIMAL:
+      return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
+    default:
+      throw new Error("Unknown primitive category " + primitiveCategory);
+    }
+  }
+
+  public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
+    int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
+    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+    HiveChar hiveChar = new HiveChar(randomString, maxLength);
+    return hiveChar;
+  }
+
+  public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
+    int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
+    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+    HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
+    return hiveVarchar;
+  }
+
+  public static byte[] getRandBinary(Random r, int len){
+    byte[] bytes = new byte[len];
+    for (int j = 0; j < len; j++){
+      bytes[j] = Byte.valueOf((byte) r.nextInt());
+    }
+    return bytes;
+  }
+
+  private static final String DECIMAL_CHARS = "0123456789";
+
+  public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
+    while (true) {
+      StringBuilder sb = new StringBuilder();
+      int precision = 1 + r.nextInt(18);
+      int scale = 0 + r.nextInt(precision + 1);
+
+      int integerDigits = precision - scale;
+
+      if (r.nextBoolean()) {
+        sb.append("-");
+      }
+
+      if (integerDigits == 0) {
+        sb.append("0");
+      } else {
+        sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
+      }
+      if (scale != 0) {
+        sb.append(".");
+        sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
+      }
+
+      HiveDecimal bd = HiveDecimal.create(sb.toString());
+      if (bd.scale() > bd.precision()) {
+        // Sometimes weird decimals are produced?
+        continue;
+      }
+
+      return bd;
+    }
+  }
+
+  public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
+    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+    String intervalYearMonthStr = String.format("%s%d-%d",
+        yearMonthSignStr,
+        Integer.valueOf(1800 + r.nextInt(500)),  // year
+        Integer.valueOf(0 + r.nextInt(12)));     // month
+    HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
+    return intervalYearMonthVal;
+  }
+
+  public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
+    String optionalNanos = "";
+    if (r.nextInt(2) == 1) {
+      optionalNanos = String.format(".%09d",
+          Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
+    }
+    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+    String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
+        yearMonthSignStr,
+        Integer.valueOf(1 + r.nextInt(28)),      // day
+        Integer.valueOf(0 + r.nextInt(24)),      // hour
+        Integer.valueOf(0 + r.nextInt(60)),      // minute
+        Integer.valueOf(0 + r.nextInt(60)),      // second
+        optionalNanos);
+    HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
+    return intervalDayTimeVal;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
index 58937db..7babf7a 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
@@ -25,10 +25,10 @@ import java.util.Random;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hive.serde2.ByteStream.Output;
 import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerdeRandomRowSource;
 import org.apache.hadoop.hive.serde2.VerifyFast;
 import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
 import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
@@ -39,8 +39,11 @@ import junit.framework.TestCase;
 
 public class TestBinarySortableFast extends TestCase {
 
+  private static String debugDetailedReadPositionString;
+  private static StackTraceElement[] debugStackTrace;
+
   private void testBinarySortableFast(
-          RandomRowObjectSource source, Object[][] rows,
+          SerdeRandomRowSource source, Object[][] rows,
           boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker,
           SerDe serde, StructObjectInspector rowOI,
           SerDe serde_fewer, StructObjectInspector writeRowOI,
@@ -134,11 +137,6 @@ public class TestBinarySortableFast extends TestCase {
       }
       binarySortableDeserializeRead.extraFieldsCheck();
       TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned());
-      if (doWriteFewerColumns) {
-        TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned());
-      } else {
-        TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned());
-      }
       TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned());
 
       /*
@@ -161,6 +159,8 @@ public class TestBinarySortableFast extends TestCase {
           try {
             VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
           } catch (EOFException e) {
+//          debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
+//          debugStackTrace = e.getStackTrace();
             threw = true;
           }
           TestCase.assertTrue(threw);
@@ -268,11 +268,6 @@ public class TestBinarySortableFast extends TestCase {
       }
       binarySortableDeserializeRead.extraFieldsCheck();
       TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned());
-      if (doWriteFewerColumns) {
-        TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned());
-      } else {
-        TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned());
-      }
       TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned());
     }
   }
@@ -280,7 +275,7 @@ public class TestBinarySortableFast extends TestCase {
   private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Random r)
       throws Throwable {
 
-    RandomRowObjectSource source = new RandomRowObjectSource();
+    SerdeRandomRowSource source = new SerdeRandomRowSource();
     source.init(r);
 
     int rowCount = 1000;

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
index 76b93c6..66c6203 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hive.serde2.lazy;
 
-import java.io.EOFException;
 import java.util.Arrays;
 import java.util.Properties;
 import java.util.Random;
@@ -25,19 +24,14 @@ import java.util.Random;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.SerdeRandomRowSource;
 import org.apache.hadoop.hive.serde2.VerifyFast;
 import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo;
-import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
 import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
 import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.io.BytesWritable;
@@ -49,7 +43,7 @@ import junit.framework.TestCase;
 public class TestLazySimpleFast extends TestCase {
 
   private void testLazySimpleFast(
-    RandomRowObjectSource source, Object[][] rows,
+    SerdeRandomRowSource source, Object[][] rows,
     LazySimpleSerDe serde, StructObjectInspector rowOI,
     LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI,
     byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer,
@@ -131,11 +125,6 @@ public class TestLazySimpleFast extends TestCase {
       }
       lazySimpleDeserializeRead.extraFieldsCheck();
       TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned());
-      if (doWriteFewerColumns) {
-        TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
-      } else {
-        TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
-      }
       TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned());
 
     }
@@ -219,11 +208,6 @@ public class TestLazySimpleFast extends TestCase {
       }
       lazySimpleDeserializeRead.extraFieldsCheck();
       TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned());
-      if (doWriteFewerColumns) {
-        TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
-      } else {
-        TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
-      }
       TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned());
     }
   }
@@ -266,7 +250,7 @@ public class TestLazySimpleFast extends TestCase {
   public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r)
       throws Throwable {
 
-    RandomRowObjectSource source = new RandomRowObjectSource();
+    SerdeRandomRowSource source = new SerdeRandomRowSource();
     source.init(r);
 
     int rowCount = 1000;

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
index d7c4999..5af11cd 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
@@ -17,43 +17,28 @@
  */
 package org.apache.hadoop.hive.serde2.lazybinary;
 
-import java.io.EOFException;
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.List;
 import java.util.Random;
 
 import junit.framework.TestCase;
 
 import org.apache.hadoop.hive.serde2.ByteStream.Output;
 import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerdeRandomRowSource;
 import org.apache.hadoop.hive.serde2.VerifyFast;
 import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo;
-import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
-import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Writable;
 
 public class TestLazyBinaryFast extends TestCase {
 
   private void testLazyBinaryFast(
-      RandomRowObjectSource source, Object[][] rows,
+      SerdeRandomRowSource source, Object[][] rows,
       SerDe serde, StructObjectInspector rowOI,
       SerDe serde_fewer, StructObjectInspector writeRowOI,
       PrimitiveTypeInfo[] primitiveTypeInfos,
@@ -131,7 +116,6 @@ public class TestLazyBinaryFast extends TestCase {
       } else {
         TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
       }
-      TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
       TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
     }
 
@@ -228,19 +212,13 @@ public class TestLazyBinaryFast extends TestCase {
       }
       lazyBinaryDeserializeRead.extraFieldsCheck();
       TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
-      if (doWriteFewerColumns) {
-        // The nullByte may cause this to not be true...
-        // TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
-      } else {
-        TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
-      }
       TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
     }
   }
 
   public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
 
-    RandomRowObjectSource source = new RandomRowObjectSource();
+    SerdeRandomRowSource source = new SerdeRandomRowSource();
     source.init(r);
 
     int rowCount = 1000;


[3/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast DeserializeRead classes; display better exception information; add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)

Posted by mm...@apache.org.
HIVE-13874: Tighten up EOF checking in Fast DeserializeRead classes; display better exception information; add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2d1f403d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2d1f403d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2d1f403d

Branch: refs/heads/master
Commit: 2d1f403dd9df751136c1b77a9cdccd3212d20c24
Parents: 64e9162
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat Aug 20 01:32:45 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat Aug 20 01:32:45 2016 -0700

----------------------------------------------------------------------
 .../persistence/BytesBytesMultiHashMap.java     |  29 +-
 .../hive/ql/exec/tez/ReduceRecordSource.java    |  10 +-
 .../ql/exec/vector/VectorDeserializeRow.java    |  32 +-
 .../hive/ql/exec/vector/VectorMapOperator.java  |  10 +-
 .../VectorMapJoinGenerateResultOperator.java    |  64 +-
 .../fast/VectorMapJoinFastLongHashTable.java    |  11 +-
 .../fast/VectorMapJoinFastStringCommon.java     |  14 +-
 .../fast/VectorMapJoinFastValueStore.java       |  56 +-
 .../hashtable/VectorMapJoinHashMapResult.java   |   4 +-
 .../VectorMapJoinOptimizedHashMap.java          |  10 +-
 .../persistence/TestBytesBytesMultiHashMap.java |   2 -
 .../ql/exec/vector/TestVectorRowObject.java     |   3 +-
 .../hive/ql/exec/vector/TestVectorSerDeRow.java |  12 +-
 .../ql/exec/vector/VectorRandomRowSource.java   | 458 ++++++++++++
 .../vector/mapjoin/fast/CheckFastHashTable.java |  19 +-
 .../mapjoin/fast/CheckFastRowHashMap.java       | 387 ++++++++++
 .../fast/TestVectorMapJoinFastBytesHashMap.java |  15 +
 .../fast/TestVectorMapJoinFastRowHashMap.java   | 718 +++++++++++++++++++
 .../exec/vector/mapjoin/fast/VerifyFastRow.java | 397 ++++++++++
 .../fast/BinarySortableDeserializeRead.java     |  65 +-
 .../hive/serde2/fast/DeserializeRead.java       |   6 +-
 .../hive/serde2/fast/RandomRowObjectSource.java | 423 -----------
 .../hadoop/hive/serde2/fast/SerializeWrite.java |   2 +-
 .../lazy/fast/LazySimpleDeserializeRead.java    |  61 +-
 .../fast/LazyBinaryDeserializeRead.java         | 146 ++--
 .../hive/serde2/SerdeRandomRowSource.java       | 423 +++++++++++
 .../binarysortable/TestBinarySortableFast.java  |  21 +-
 .../hive/serde2/lazy/TestLazySimpleFast.java    |  22 +-
 .../serde2/lazybinary/TestLazyBinaryFast.java   |  28 +-
 29 files changed, 2744 insertions(+), 704 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
index dd88461..6b89e98 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
@@ -298,8 +298,8 @@ public final class BytesBytesMultiHashMap {
     }
 
     /**
-     * Read the current value.  
-     * 
+     * Read the current value.
+     *
      * @return
      *           The ByteSegmentRef to the current value read.
      */
@@ -380,29 +380,6 @@ public final class BytesBytesMultiHashMap {
     }
 
     /**
-     * @return Whether we have read all the values or not.
-     */
-    public boolean isEof() {
-      // LOG.info("BytesBytesMultiHashMap isEof hasRows " + hasRows + " hasList " + hasList + " readIndex " + readIndex + " nextTailOffset " + nextTailOffset);
-      if (!hasRows) {
-        return true;
-      }
-
-      if (!hasList) {
-        return (readIndex > 0);
-      } else {
-        // Multiple values.
-        if (readIndex <= 1) {
-          // Careful: We have not read the list record and 2nd value yet, so nextTailOffset
-          // is not valid yet.
-          return false;
-        } else {
-          return (nextTailOffset <= 0);
-        }
-      }
-    }
-
-    /**
      * Lets go of any references to a hash map.
      */
     public void forget() {
@@ -741,7 +718,7 @@ public final class BytesBytesMultiHashMap {
     long capacity = refs.length << 1;
     expandAndRehashImpl(capacity);
   }
-  
+
   private void expandAndRehashImpl(long capacity) {
     long expandTime = System.currentTimeMillis();
     final long[] oldRefs = refs;

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
index e966ff1..f4c3b81 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
@@ -413,7 +414,14 @@ public class ReduceRecordSource implements RecordSource {
     //     VectorizedBatchUtil.displayBytes(keyBytes, 0, keyLength));
 
     keyBinarySortableDeserializeToRow.setBytes(keyBytes, 0, keyLength);
-    keyBinarySortableDeserializeToRow.deserialize(batch, 0);
+    try {
+      keyBinarySortableDeserializeToRow.deserialize(batch, 0);
+    } catch (Exception e) {
+      throw new HiveException(
+          "\nDeserializeRead details: " +
+              keyBinarySortableDeserializeToRow.getDetailedReadPositionString(),
+          e);
+    }
     for(int i = 0; i < firstValueColumnOffset; i++) {
       VectorizedBatchUtil.setRepeatingColumn(batch, i);
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
index fca1882..f66916b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
@@ -650,38 +650,26 @@ public final class VectorDeserializeRow<T extends DeserializeRead> {
   /**
    * Deserialize a row from the range of bytes specified by setBytes.
    *
+   * Use getDetailedReadPositionString to get detailed read position information to help
+   * diagnose exceptions that are thrown...
+   *
    * @param batch
    * @param batchIndex
    * @throws IOException
    */
   public void deserialize(VectorizedRowBatch batch, int batchIndex) throws IOException {
     final int count = isConvert.length;
-    int i = 0;
-    try {
-      while (i < count) {
-        if (isConvert[i]) {
-          deserializeConvertRowColumn(batch, batchIndex, i);
-        } else {
-          deserializeRowColumn(batch, batchIndex, i);
-        }
-        i++;    // Increment after the apply which could throw an exception.
+    for (int i = 0; i < count; i++) {
+      if (isConvert[i]) {
+        deserializeConvertRowColumn(batch, batchIndex, i);
+      } else {
+        deserializeRowColumn(batch, batchIndex, i);
       }
-    } catch (EOFException e) {
-      throwMoreDetailedException(e, i);
     }
     deserializeRead.extraFieldsCheck();
   }
 
-  private void throwMoreDetailedException(IOException e, int index) throws EOFException {
-    StringBuilder sb = new StringBuilder();
-    sb.append("Detail: \"" + e.toString() + "\" occured for field " + index + " of " +  sourceTypeInfos.length + " fields (");
-    for (int i = 0; i < sourceTypeInfos.length; i++) {
-      if (i > 0) {
-        sb.append(", ");
-      }
-      sb.append(((PrimitiveTypeInfo) sourceTypeInfos[i]).getPrimitiveCategory().name());
-    }
-    sb.append(")");
-    throw new EOFException(sb.toString());
+  public String getDetailedReadPositionString() {
+    return deserializeRead.getDetailedReadPositionString();
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index a65cac0..2bdc59b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -820,7 +820,15 @@ public class VectorMapOperator extends AbstractMapOperator {
               currentDeserializeRead.set(binComp.getBytes(), 0, binComp.getLength());
 
               // Deserialize and append new row using the current batch size as the index.
-              currentVectorDeserializeRow.deserialize(deserializerBatch, deserializerBatch.size++);
+              try {
+                currentVectorDeserializeRow.deserialize(
+                    deserializerBatch, deserializerBatch.size++);
+              } catch (Exception e) {
+                throw new HiveException(
+                    "\nDeserializeRead detail: " +
+                        currentVectorDeserializeRow.getDetailedReadPositionString(),
+                    e);
+              }
             }
             break;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
index 22b2a17..469f86a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
@@ -134,6 +134,27 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
     batch.selectedInUse = saveSelectedInUse;
   }
 
+  protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex,
+      ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult)
+          throws HiveException {
+
+    byte[] bytes = byteSegmentRef.getBytes();
+    int offset = (int) byteSegmentRef.getOffset();
+    int length = byteSegmentRef.getLength();
+    smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
+
+    try {
+      smallTableVectorDeserializeRow.deserialize(batch, batchIndex);
+    } catch (Exception e) {
+      throw new HiveException(
+          "\nHashMapResult detail: " +
+              hashMapResult.getDetailedHashMapResultPositionString() +
+          "\nDeserializeRead detail: " +
+              smallTableVectorDeserializeRow.getDetailedReadPositionString(),
+          e);
+    }
+  }
+
   //------------------------------------------------------------------------------------------------
 
   /*
@@ -180,13 +201,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
       }
 
       if (smallTableVectorDeserializeRow != null) {
-
-        byte[] bytes = byteSegmentRef.getBytes();
-        int offset = (int) byteSegmentRef.getOffset();
-        int length = byteSegmentRef.getLength();
-        smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
-
-        smallTableVectorDeserializeRow.deserialize(batch, batchIndex);
+        doSmallTableDeserializeRow(batch, batchIndex,
+            byteSegmentRef, hashMapResult);
       }
 
       // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table");
@@ -248,12 +264,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
 
         if (smallTableVectorDeserializeRow != null) {
 
-          byte[] bytes = byteSegmentRef.getBytes();
-          int offset = (int) byteSegmentRef.getOffset();
-          int length = byteSegmentRef.getLength();
-          smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
-
-          smallTableVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size);
+          doSmallTableDeserializeRow(overflowBatch, overflowBatch.size,
+              byteSegmentRef, hashMapResult);
         }
 
         // VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow");
@@ -298,13 +310,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
       while (byteSegmentRef != null) {
 
         if (smallTableVectorDeserializeRow != null) {
-
-          byte[] bytes = byteSegmentRef.getBytes();
-          int offset = (int) byteSegmentRef.getOffset();
-          int length = byteSegmentRef.getLength();
-          smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
-
-          smallTableVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size);
+          doSmallTableDeserializeRow(overflowBatch, overflowBatch.size,
+              byteSegmentRef, hashMapResult);
         }
 
         overflowBatch.size++;
@@ -348,10 +355,10 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
         }
       }
 
-      if (hashMapResult.isEof()) {
+      byteSegmentRef = hashMapResult.next();
+      if (byteSegmentRef == null) {
         break;
       }
-      byteSegmentRef = hashMapResult.next();
 
       // Get ready for a another round of small table values.
       overflowBatch.reset();
@@ -543,14 +550,18 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
         int offset = bigTable.currentOffset();
         int length = bigTable.currentLength();
 
-//      LOG.debug(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length);
-
         bigTableVectorDeserializeRow.setBytes(bytes, offset, length);
-        bigTableVectorDeserializeRow.deserialize(spillReplayBatch, spillReplayBatch.size);
+        try {
+          bigTableVectorDeserializeRow.deserialize(spillReplayBatch, spillReplayBatch.size);
+        } catch (Exception e) {
+          throw new HiveException(
+              "\nDeserializeRead detail: " +
+                  bigTableVectorDeserializeRow.getDetailedReadPositionString(),
+              e);
+        }
         spillReplayBatch.size++;
 
         if (spillReplayBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
-          // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
           process(spillReplayBatch, posBigTable); // call process once we have a full batch
           spillReplayBatch.reset();
           batchCount++;
@@ -558,7 +569,6 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
       }
       // Process the row batch that has less than DEFAULT_SIZE rows
       if (spillReplayBatch.size > 0) {
-        // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
         process(spillReplayBatch, posBigTable);
         spillReplayBatch.reset();
         batchCount++;

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index 5373aad..ee66d5b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -78,8 +78,15 @@ public abstract class VectorMapJoinFastLongHashTable
     byte[] keyBytes = currentKey.getBytes();
     int keyLength = currentKey.getLength();
     keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength);
-    if (keyBinarySortableDeserializeRead.readCheckNull()) {
-      return;
+    try {
+      if (keyBinarySortableDeserializeRead.readCheckNull()) {
+        return;
+      }
+    } catch (Exception e) {
+      throw new HiveException(
+          "\nDeserializeRead details: " +
+              keyBinarySortableDeserializeRead.getDetailedReadPositionString() +
+          "\nException: " + e.toString());
     }
 
     long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
index 985fb1c..bf378ac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
@@ -45,8 +45,15 @@ public class VectorMapJoinFastStringCommon {
     byte[] keyBytes = currentKey.getBytes();
     int keyLength = currentKey.getLength();
     keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength);
-    if (keyBinarySortableDeserializeRead.readCheckNull()) {
-      return;
+    try {
+      if (keyBinarySortableDeserializeRead.readCheckNull()) {
+        return;
+      }
+    } catch (Exception e) {
+      throw new HiveException(
+          "\nDeserializeRead details: " +
+              keyBinarySortableDeserializeRead.getDetailedReadPositionString() +
+          "\nException: " + e.toString());
     }
 
     hashTable.add(
@@ -59,6 +66,7 @@ public class VectorMapJoinFastStringCommon {
   public VectorMapJoinFastStringCommon(boolean isOuterJoin) {
     this.isOuterJoin = isOuterJoin;
     PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo };
-    keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos);
+    keyBinarySortableDeserializeRead =
+        new BinarySortableDeserializeRead(primitiveTypeInfos);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index f96e32b..f9c5b34 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -122,9 +122,7 @@ public class VectorMapJoinFastValueStore {
     private boolean isSingleRow;
     private int cappedCount;
 
-    private boolean haveReadCurrent;
     private int readIndex;
-    private boolean isEof;
 
     private boolean isNextEof;
     private boolean isNextLast;
@@ -153,9 +151,48 @@ public class VectorMapJoinFastValueStore {
       cappedCount =
           (int) ((valueRefWord & CappedCount.bitMask) >> CappedCount.bitShift);
       // Position to beginning.
-      haveReadCurrent = false;
       readIndex = 0;
-      isEof = false;
+    }
+
+    /**
+     * Get detailed HashMap result position information to help diagnose exceptions.
+     */
+    @Override
+    public String getDetailedHashMapResultPositionString() {
+      StringBuffer sb = new StringBuffer();
+
+      sb.append("Read index ");
+      sb.append(readIndex);
+      if (isSingleRow) {
+        sb.append(" single row");
+      } else {
+        sb.append(" capped count ");
+        sb.append(cappedCount);
+      }
+
+      if (readIndex > 0) {
+        sb.append(" byteSegmentRef is byte[] of length ");
+        sb.append(byteSegmentRef.getBytes().length);
+        sb.append(" at offset ");
+        sb.append(byteSegmentRef.getOffset());
+        sb.append(" for length ");
+        sb.append(byteSegmentRef.getLength());
+        if (!isSingleRow) {
+          sb.append(" (isNextEof ");
+          sb.append(isNextEof);
+          sb.append(" isNextLast ");
+          sb.append(isNextLast);
+          sb.append(" nextAbsoluteValueOffset ");
+          sb.append(nextAbsoluteValueOffset);
+          sb.append(" isNextValueLengthSmall ");
+          sb.append(isNextValueLengthSmall);
+          sb.append(" nextSmallValueLength ");
+          sb.append(nextSmallValueLength);
+          sb.append(")");
+        }
+      }
+
+      return sb.toString();
     }
 
     @Override
@@ -193,9 +230,7 @@ public class VectorMapJoinFastValueStore {
       }
 
       // Position to beginning.
-      haveReadCurrent = false;
       readIndex = 0;
-      isEof = false;
 
       return internalRead();
     }
@@ -363,18 +398,9 @@ public class VectorMapJoinFastValueStore {
     }
 
     @Override
-    public boolean isEof() {
-      if (!hasRows) {
-        return true;
-      }
-      return isEof;
-    }
-
-    @Override
     public void forget() {
     }
 
-
     @Override
     public String toString() {
       StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java
index fa6dedb..a5dfba8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java
@@ -57,7 +57,7 @@ public abstract class VectorMapJoinHashMapResult extends VectorMapJoinHashTableR
   public abstract ByteSegmentRef next();
 
   /**
-   * @return Whether reading is at the end.
+   * Get detailed HashMap result position information to help diagnose exceptions.
    */
-  public abstract boolean isEof();
+  public abstract String getDetailedHashMapResultPositionString();
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java
index b7da976..eada694 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java
@@ -89,11 +89,6 @@ public class VectorMapJoinOptimizedHashMap
     }
 
     @Override
-    public boolean isEof() {
-      return bytesBytesMultiHashMapResult.isEof();
-    }
-
-    @Override
     public void forget() {
       bytesBytesMultiHashMapResult.forget();
       super.forget();
@@ -106,6 +101,11 @@ public class VectorMapJoinOptimizedHashMap
       sb.append("isSingleRow " + (joinResult() == JoinUtil.JoinResult.MATCH ? isSingleRow() : "<none>") + ")");
       return sb.toString();
     }
+
+    @Override
+    public String getDetailedHashMapResultPositionString() {
+      return "(Not supported yet)";
+    }
  }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
index aed9214..c1d7c72 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
@@ -133,8 +133,6 @@ public class TestBytesBytesMultiHashMap {
         hs.add(ref.copy());
         ref = hashMapResult.next();
       }
-    } else {
-      assertTrue(hashMapResult.isEof());
     }
     assertEquals(state, count);
     assertEquals(values.length, count);

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
index c55d951..9c4a973 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
@@ -22,7 +22,6 @@ import java.util.Arrays;
 import java.util.Random;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
 
 import junit.framework.TestCase;
 
@@ -51,7 +50,7 @@ public class TestVectorRowObject extends TestCase {
 
     String[] emptyScratchTypeNames = new String[0];
 
-    RandomRowObjectSource source = new RandomRowObjectSource();
+    VectorRandomRowSource source = new VectorRandomRowSource();
     source.init(r);
 
     VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
index da69ee3..c6704f9 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
@@ -48,7 +48,6 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output;
 import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
 import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
 import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
 import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
@@ -85,7 +84,7 @@ public class TestVectorSerDeRow extends TestCase {
   }
 
   void deserializeAndVerify(Output output, DeserializeRead deserializeRead,
-              RandomRowObjectSource source, Object[] expectedRow)
+              VectorRandomRowSource source, Object[] expectedRow)
               throws HiveException, IOException {
     deserializeRead.set(output.getData(),  0, output.getLength());
     PrimitiveCategory[] primitiveCategories = source.primitiveCategories();
@@ -281,12 +280,11 @@ public class TestVectorSerDeRow extends TestCase {
     }
     deserializeRead.extraFieldsCheck();
     TestCase.assertTrue(!deserializeRead.readBeyondConfiguredFieldsWarned());
-    TestCase.assertTrue(!deserializeRead.readBeyondBufferRangeWarned());
     TestCase.assertTrue(!deserializeRead.bufferRangeHasExtraDataWarned());
   }
 
   void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow,
-           DeserializeRead deserializeRead, RandomRowObjectSource source, Object[][] randomRows,
+           DeserializeRead deserializeRead, VectorRandomRowSource source, Object[][] randomRows,
            int firstRandomRowIndex) throws HiveException, IOException {
 
     Output output = new Output();
@@ -311,7 +309,7 @@ public class TestVectorSerDeRow extends TestCase {
 
     String[] emptyScratchTypeNames = new String[0];
 
-    RandomRowObjectSource source = new RandomRowObjectSource();
+    VectorRandomRowSource source = new VectorRandomRowSource();
     source.init(r);
 
     VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
@@ -389,7 +387,7 @@ public class TestVectorSerDeRow extends TestCase {
     }
   }
 
-  private Output serializeRow(Object[] row, RandomRowObjectSource source, SerializeWrite serializeWrite) throws HiveException, IOException {
+  private Output serializeRow(Object[] row, VectorRandomRowSource source, SerializeWrite serializeWrite) throws HiveException, IOException {
     Output output = new Output();
     serializeWrite.set(output);
     PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
@@ -542,7 +540,7 @@ public class TestVectorSerDeRow extends TestCase {
 
     String[] emptyScratchTypeNames = new String[0];
 
-    RandomRowObjectSource source = new RandomRowObjectSource();
+    VectorRandomRowSource source = new VectorRandomRowSource();
     source.init(r);
 
     VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
new file mode 100644
index 0000000..349c76a
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -0,0 +1,458 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.RandomTypeUtil;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hive.common.util.DateUtils;
+
+/**
+ * Generate object inspector and random row object[].
+ */
+public class VectorRandomRowSource {
+
+  private Random r;
+
+  private int columnCount;
+
+  private List<String> typeNames;
+
+  private PrimitiveCategory[] primitiveCategories;
+
+  private PrimitiveTypeInfo[] primitiveTypeInfos;
+
+  private List<ObjectInspector> primitiveObjectInspectorList;
+
+  private StructObjectInspector rowStructObjectInspector;
+
+  public List<String> typeNames() {
+    return typeNames;
+  }
+
+  public PrimitiveCategory[] primitiveCategories() {
+    return primitiveCategories;
+  }
+
+  public PrimitiveTypeInfo[] primitiveTypeInfos() {
+    return primitiveTypeInfos;
+  }
+
+  public StructObjectInspector rowStructObjectInspector() {
+    return rowStructObjectInspector;
+  }
+
+  public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
+    ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
+        new ArrayList<ObjectInspector>(partialFieldCount);
+    List<String> columnNames = new ArrayList<String>(partialFieldCount);
+    for (int i = 0; i < partialFieldCount; i++) {
+      columnNames.add(String.format("partial%d", i));
+      partialPrimitiveObjectInspectorList.add(
+          PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+              primitiveTypeInfos[i]));
+    }
+
+    return ObjectInspectorFactory.getStandardStructObjectInspector(
+        columnNames, primitiveObjectInspectorList);
+  }
+
+  public void init(Random r) {
+    this.r = r;
+    chooseSchema();
+  }
+
+  /*
+   * For now, exclude CHAR until we determine why there is a difference (blank padding)
+   * serializing with LazyBinarySerializeWrite and the regular SerDe...
+   */
+  private static String[] possibleHiveTypeNames = {
+      "boolean",
+      "tinyint",
+      "smallint",
+      "int",
+      "bigint",
+      "date",
+      "float",
+      "double",
+      "string",
+//    "char",
+      "varchar",
+      "binary",
+      "date",
+      "timestamp",
+      "interval_year_month",
+      "interval_day_time",
+      "decimal"
+  };
+
+  private void chooseSchema() {
+    HashSet hashSet = null;
+    boolean allTypes;
+    boolean onlyOne = (r.nextInt(100) == 7);
+    if (onlyOne) {
+      columnCount = 1;
+      allTypes = false;
+    } else {
+      allTypes = r.nextBoolean();
+      if (allTypes) {
+        // One of each type.
+        columnCount = possibleHiveTypeNames.length;
+        hashSet = new HashSet<Integer>();
+      } else {
+        columnCount = 1 + r.nextInt(20);
+      }
+    }
+    typeNames = new ArrayList<String>(columnCount);
+    primitiveCategories = new PrimitiveCategory[columnCount];
+    primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
+    primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
+    List<String> columnNames = new ArrayList<String>(columnCount);
+    for (int c = 0; c < columnCount; c++) {
+      columnNames.add(String.format("col%d", c));
+      String typeName;
+
+      if (onlyOne) {
+        typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
+      } else {
+        int typeNum;
+        if (allTypes) {
+          while (true) {
+            typeNum = r.nextInt(possibleHiveTypeNames.length);
+            Integer typeNumInteger = new Integer(typeNum);
+            if (!hashSet.contains(typeNumInteger)) {
+              hashSet.add(typeNumInteger);
+              break;
+            }
+          }
+        } else {
+          typeNum = r.nextInt(possibleHiveTypeNames.length);
+        }
+        typeName = possibleHiveTypeNames[typeNum];
+      }
+      if (typeName.equals("char")) {
+        int maxLength = 1 + r.nextInt(100);
+        typeName = String.format("char(%d)", maxLength);
+      } else if (typeName.equals("varchar")) {
+        int maxLength = 1 + r.nextInt(100);
+        typeName = String.format("varchar(%d)", maxLength);
+      } else if (typeName.equals("decimal")) {
+        typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
+      }
+      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+      primitiveTypeInfos[c] = primitiveTypeInfo;
+      PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+      primitiveCategories[c] = primitiveCategory;
+      primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+      typeNames.add(typeName);
+    }
+    rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
+  }
+
+  public Object[][] randomRows(int n) {
+    Object[][] result = new Object[n][];
+    for (int i = 0; i < n; i++) {
+      result[i] = randomRow();
+    }
+    return result;
+  }
+
+  public Object[] randomRow() {
+    Object row[] = new Object[columnCount];
+    for (int c = 0; c < columnCount; c++) {
+      Object object = randomObject(c);
+      if (object == null) {
+        throw new Error("Unexpected null for column " + c);
+      }
+      row[c] = getWritableObject(c, object);
+      if (row[c] == null) {
+        throw new Error("Unexpected null for writable for column " + c);
+      }
+    }
+    return row;
+  }
+
+  public Object[] randomRow(int columnCount) {
+    return randomRow(columnCount, r, primitiveObjectInspectorList, primitiveCategories,
+        primitiveTypeInfos);
+  }
+
+  public static Object[] randomRow(int columnCount, Random r,
+      List<ObjectInspector> primitiveObjectInspectorList, PrimitiveCategory[] primitiveCategories,
+      PrimitiveTypeInfo[] primitiveTypeInfos) {
+    Object row[] = new Object[columnCount];
+    for (int c = 0; c < columnCount; c++) {
+      Object object = randomObject(c, r, primitiveCategories, primitiveTypeInfos);
+      if (object == null) {
+        throw new Error("Unexpected null for column " + c);
+      }
+      row[c] = getWritableObject(c, object, primitiveObjectInspectorList,
+          primitiveCategories, primitiveTypeInfos);
+      if (row[c] == null) {
+        throw new Error("Unexpected null for writable for column " + c);
+      }
+    }
+    return row;
+  }
+
+  public static void sort(Object[][] rows, ObjectInspector oi) {
+    for (int i = 0; i < rows.length; i++) {
+      for (int j = i + 1; j < rows.length; j++) {
+        if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
+          Object[] t = rows[i];
+          rows[i] = rows[j];
+          rows[j] = t;
+        }
+      }
+    }
+  }
+
+  public void sort(Object[][] rows) {
+    VectorRandomRowSource.sort(rows, rowStructObjectInspector);
+  }
+
+  public Object getWritableObject(int column, Object object) {
+    return getWritableObject(column, object, primitiveObjectInspectorList,
+        primitiveCategories, primitiveTypeInfos);
+  }
+
+  public static Object getWritableObject(int column, Object object,
+      List<ObjectInspector> primitiveObjectInspectorList, PrimitiveCategory[] primitiveCategories,
+      PrimitiveTypeInfo[] primitiveTypeInfos) {
+    ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
+    PrimitiveCategory primitiveCategory = primitiveCategories[column];
+    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+    switch (primitiveCategory) {
+    case BOOLEAN:
+      return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
+    case BYTE:
+      return ((WritableByteObjectInspector) objectInspector).create((byte) object);
+    case SHORT:
+      return ((WritableShortObjectInspector) objectInspector).create((short) object);
+    case INT:
+      return ((WritableIntObjectInspector) objectInspector).create((int) object);
+    case LONG:
+      return ((WritableLongObjectInspector) objectInspector).create((long) object);
+    case DATE:
+      return ((WritableDateObjectInspector) objectInspector).create((Date) object);
+    case FLOAT:
+      return ((WritableFloatObjectInspector) objectInspector).create((float) object);
+    case DOUBLE:
+      return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
+    case STRING:
+      return ((WritableStringObjectInspector) objectInspector).create((String) object);
+    case CHAR:
+      {
+        WritableHiveCharObjectInspector writableCharObjectInspector =
+                new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
+        return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
+      }
+    case VARCHAR:
+      {
+        WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
+                new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
+        return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
+      }
+    case BINARY:
+      return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
+    case TIMESTAMP:
+      return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
+    case INTERVAL_YEAR_MONTH:
+      return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
+    case INTERVAL_DAY_TIME:
+      return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
+    case DECIMAL:
+      {
+        WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
+                new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
+        return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
+      }
+    default:
+      throw new Error("Unknown primitive category " + primitiveCategory);
+    }
+  }
+
+  public Object randomObject(int column) {
+    return randomObject(column, r, primitiveCategories, primitiveTypeInfos);
+  }
+
+  public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories,
+      PrimitiveTypeInfo[] primitiveTypeInfos) {
+    PrimitiveCategory primitiveCategory = primitiveCategories[column];
+    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+    switch (primitiveCategory) {
+    case BOOLEAN:
+      return Boolean.valueOf(r.nextInt(1) == 1);
+    case BYTE:
+      return Byte.valueOf((byte) r.nextInt());
+    case SHORT:
+      return Short.valueOf((short) r.nextInt());
+    case INT:
+      return Integer.valueOf(r.nextInt());
+    case LONG:
+      return Long.valueOf(r.nextLong());
+    case DATE:
+      return RandomTypeUtil.getRandDate(r);
+    case FLOAT:
+      return Float.valueOf(r.nextFloat() * 10 - 5);
+    case DOUBLE:
+      return Double.valueOf(r.nextDouble() * 10 - 5);
+    case STRING:
+      return RandomTypeUtil.getRandString(r);
+    case CHAR:
+      return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
+    case VARCHAR:
+      return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
+    case BINARY:
+      return getRandBinary(r, 1 + r.nextInt(100));
+    case TIMESTAMP:
+      return RandomTypeUtil.getRandTimestamp(r);
+    case INTERVAL_YEAR_MONTH:
+      return getRandIntervalYearMonth(r);
+    case INTERVAL_DAY_TIME:
+      return getRandIntervalDayTime(r);
+    case DECIMAL:
+      return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
+    default:
+      throw new Error("Unknown primitive category " + primitiveCategory);
+    }
+  }
+
+  public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
+    int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
+    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+    HiveChar hiveChar = new HiveChar(randomString, maxLength);
+    return hiveChar;
+  }
+
+  public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
+    int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
+    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+    HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
+    return hiveVarchar;
+  }
+
+  public static byte[] getRandBinary(Random r, int len){
+    byte[] bytes = new byte[len];
+    for (int j = 0; j < len; j++){
+      bytes[j] = Byte.valueOf((byte) r.nextInt());
+    }
+    return bytes;
+  }
+
+  private static final String DECIMAL_CHARS = "0123456789";
+
+  public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
+    while (true) {
+      StringBuilder sb = new StringBuilder();
+      int precision = 1 + r.nextInt(18);
+      int scale = 0 + r.nextInt(precision + 1);
+
+      int integerDigits = precision - scale;
+
+      if (r.nextBoolean()) {
+        sb.append("-");
+      }
+
+      if (integerDigits == 0) {
+        sb.append("0");
+      } else {
+        sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
+      }
+      if (scale != 0) {
+        sb.append(".");
+        sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
+      }
+
+      HiveDecimal bd = HiveDecimal.create(sb.toString());
+      if (bd.scale() > bd.precision()) {
+        // Sometimes weird decimals are produced?
+        continue;
+      }
+
+      return bd;
+    }
+  }
+
+  public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
+    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+    String intervalYearMonthStr = String.format("%s%d-%d",
+        yearMonthSignStr,
+        Integer.valueOf(1800 + r.nextInt(500)),  // year
+        Integer.valueOf(0 + r.nextInt(12)));     // month
+    HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
+    return intervalYearMonthVal;
+  }
+
+  public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
+    String optionalNanos = "";
+    if (r.nextInt(2) == 1) {
+      optionalNanos = String.format(".%09d",
+          Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
+    }
+    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+    String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
+        yearMonthSignStr,
+        Integer.valueOf(1 + r.nextInt(28)),      // day
+        Integer.valueOf(0 + r.nextInt(24)),      // hour
+        Integer.valueOf(0 + r.nextInt(60)),      // minute
+        Integer.valueOf(0 + r.nextInt(60)),      // second
+        optionalNanos);
+    HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
+    return intervalDayTimeVal;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
index 3a23584..28a4dc6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
@@ -23,6 +23,7 @@ import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Properties;
 import java.util.Random;
 import java.util.TreeMap;
 
@@ -43,12 +44,14 @@ import static org.junit.Assert.*;
 
 public class CheckFastHashTable {
 
-  public static boolean findMatch(byte[] valueBytes, List<byte[]> actualValues, int actualCount, boolean[] taken) {
+  public static boolean findMatch(int valueIndex, byte[] valueBytes, List<byte[]> actualValues,
+      int actualCount, boolean[] actualTaken, int[] actualToValueMap) {
     for (int i = 0; i < actualCount; i++) {
-      if (!taken[i]) {
+      if (!actualTaken[i]) {
         byte[] actualBytes = actualValues.get(i);
         if (StringExpr.compare(valueBytes, 0, valueBytes.length, actualBytes, 0, actualBytes.length) == 0) {
-          taken[i] = true;
+          actualToValueMap[i] = valueIndex;
+          actualTaken[i] = true;
           return true;
         }
       }
@@ -56,7 +59,7 @@ public class CheckFastHashTable {
     return false;
   }
 
-  public static void verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult,
+  public static int[] verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult,
       List<byte[]> values) {
 
     int valueCount = values.size();
@@ -87,15 +90,16 @@ public class CheckFastHashTable {
       TestCase.fail("values.size() " + valueCount + " does not match actualCount " + actualCount);
     }
 
-    boolean[] taken = new boolean[actualCount];
+    boolean[] actualTaken = new boolean[actualCount];
+    int[] actualToValueMap = new int[actualCount];
 
     for (int i = 0; i < actualCount; i++) {
       byte[] valueBytes = values.get(i);
 
-      if (!findMatch(valueBytes, actualValues, actualCount, taken)) {
+      if (!findMatch(i, valueBytes, actualValues, actualCount, actualTaken, actualToValueMap)) {
         List<Integer> availableLengths = new ArrayList<Integer>();
         for (int a = 0; a < actualCount; a++) {
-          if (!taken[a]) {
+          if (!actualTaken[a]) {
             availableLengths.add(actualValues.get(a).length);
           }
         }
@@ -103,6 +107,7 @@ public class CheckFastHashTable {
             ", availableLengths " + availableLengths.toString() + " of " + actualCount + " total)");
       }
     }
+    return actualToValueMap;
   }
 
   /*

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
new file mode 100644
index 0000000..0bcfb56
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
@@ -0,0 +1,387 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeMap;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.serde2.WriteBuffers;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+import com.google.common.base.Preconditions;
+
+public class CheckFastRowHashMap extends CheckFastHashTable {
+
+  public static void verifyHashMapRows(List<Object[]> rows, int[] actualToValueMap,
+      VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos) throws IOException {
+
+    final int count = rows.size();
+    final int columnCount = typeInfos.length;
+
+    WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
+
+    for (int a = 0; a < count; a++) {
+
+      int valueIndex = actualToValueMap[a];
+
+      Object[] row = rows.get(valueIndex);
+
+      byte[] bytes = ref.getBytes();
+      int offset = (int) ref.getOffset();
+      int length = ref.getLength();
+
+      LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
+          new LazyBinaryDeserializeRead(typeInfos);
+
+      lazyBinaryDeserializeRead.set(bytes, offset, length);
+
+      for (int index = 0; index < columnCount; index++) {
+        Writable writable = (Writable) row[index];
+        VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable);
+      }
+      lazyBinaryDeserializeRead.extraFieldsCheck();
+      TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
+
+      TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
+
+      ref = hashMapResult.next();
+      if (a == count - 1) {
+        TestCase.assertTrue (ref == null);
+      } else {
+        TestCase.assertTrue (ref != null);
+      }
+    }
+  }
+
+  private static String debugDetailedReadPositionString;
+
+  private static String debugDetailedHashMapResultPositionString;
+
+  private static String debugExceptionMessage;
+  private static StackTraceElement[] debugStackTrace;
+
+  public static void verifyHashMapRowsMore(List<Object[]> rows, int[] actualToValueMap,
+      VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos,
+      int clipIndex, boolean useExactBytes) throws IOException {
+
+    final int count = rows.size();
+    final int columnCount = typeInfos.length;
+
+    WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
+
+    for (int a = 0; a < count; a++) {
+
+      int valueIndex = actualToValueMap[a];
+
+      Object[] row = rows.get(valueIndex);
+
+      byte[] bytes = ref.getBytes();
+      int offset = (int) ref.getOffset();
+      int length = ref.getLength();
+      if (a == clipIndex) {
+        length--;
+      }
+
+      if (useExactBytes) {
+        // Use exact byte array which might generate array out of bounds...
+        bytes = Arrays.copyOfRange(bytes, offset, offset + length);
+        offset = 0;
+      }
+
+      LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
+          new LazyBinaryDeserializeRead(typeInfos);
+
+      lazyBinaryDeserializeRead.set(bytes, offset, length);
+
+      boolean thrown = false;
+      Exception saveException = null;
+      boolean notExpected = false;
+      int index = 0;
+      try {
+        for (index = 0; index < columnCount; index++) {
+          Writable writable = (Writable) row[index];
+          VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable);
+        }
+      } catch (Exception e) {
+        thrown = true;
+        saveException = e;
+        debugDetailedReadPositionString = lazyBinaryDeserializeRead.getDetailedReadPositionString();
+
+        debugDetailedHashMapResultPositionString = hashMapResult.getDetailedHashMapResultPositionString();
+
+        debugExceptionMessage = saveException.getMessage();
+        debugStackTrace = saveException.getStackTrace();
+      }
+      if (a == clipIndex) {
+        if (!thrown) {
+          TestCase.fail("Expecting an exception to be thrown for the clipped case...");
+        } else {
+          TestCase.assertTrue(saveException != null);
+          if (saveException instanceof EOFException) {
+            // This is the one we are expecting.
+          } else if (saveException instanceof ArrayIndexOutOfBoundsException) {
+            notExpected = true;
+          } else {
+            TestCase.fail("Expecting an EOFException to be thrown for the clipped case...");
+          }
+        }
+      } else {
+        if (thrown) {
+          TestCase.fail("Not expecting an exception to be thrown for the non-clipped case...");
+        }
+        lazyBinaryDeserializeRead.extraFieldsCheck();
+        TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
+
+        TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
+      }
+
+      ref = hashMapResult.next();
+      if (a == count - 1) {
+        TestCase.assertTrue (ref == null);
+      } else {
+        TestCase.assertTrue (ref != null);
+      }
+    }
+  }
+
+  /*
+   * Element for Key: row and byte[] x Hash Table: HashMap
+   */
+  public static class FastRowHashMapElement {
+    private byte[] key;
+    private Object[] keyRow;
+    private List<byte[]> values;
+    private List<Object[]> valueRows;
+
+    public FastRowHashMapElement(byte[] key, Object[] keyRow, byte[] firstValue,
+        Object[] valueRow) {
+      this.key = key;
+      this.keyRow = keyRow;
+      values = new ArrayList<byte[]>();
+      values.add(firstValue);
+      valueRows = new ArrayList<Object[]>();
+      valueRows.add(valueRow);
+    }
+
+    public byte[] getKey() {
+      return key;
+    }
+
+    public Object[] getKeyRow() {
+      return keyRow;
+    }
+
+    public int getCount() {
+      return values.size();
+    }
+
+    public List<byte[]> getValues() {
+      return values;
+    }
+
+    public List<Object[]> getValueRows() {
+      return valueRows;
+    }
+
+    public void add(byte[] value, Object[] valueRow) {
+      values.add(value);
+      valueRows.add(valueRow);
+    }
+  }
+
+  /*
+   * Verify table for Key: row and byte[] x Hash Table: HashMap
+   */
+  public static class VerifyFastRowHashMap {
+
+    private int count;
+
+    private FastRowHashMapElement[] array;
+
+    private TreeMap<BytesWritable, Integer> keyValueMap;
+
+    public VerifyFastRowHashMap() {
+      count = 0;
+      array = new FastRowHashMapElement[50];
+
+      // We use BytesWritable because it supports Comparable for our TreeMap.
+      keyValueMap = new TreeMap<BytesWritable, Integer>();
+    }
+
+    public int getCount() {
+      return count;
+    }
+
+    public boolean contains(byte[] key) {
+      BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+      return keyValueMap.containsKey(keyBytesWritable);
+    }
+
+    public void add(byte[] key, Object[] keyRow, byte[] value, Object[] valueRow) {
+      BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+      if (keyValueMap.containsKey(keyBytesWritable)) {
+        int index = keyValueMap.get(keyBytesWritable);
+        array[index].add(value, valueRow);
+      } else {
+        if (count >= array.length) {
+          // Grow.
+          FastRowHashMapElement[] newArray = new FastRowHashMapElement[array.length * 2];
+          System.arraycopy(array, 0, newArray, 0, count);
+          array = newArray;
+        }
+        array[count] = new FastRowHashMapElement(key, keyRow, value, valueRow);
+        keyValueMap.put(keyBytesWritable, count);
+        count++;
+      }
+    }
+
+    public byte[] addRandomExisting(byte[] value, Object[] valueRow, Random r) {
+      Preconditions.checkState(count > 0);
+      int index = r.nextInt(count);
+      array[index].add(value, valueRow);
+      return array[index].getKey();
+    }
+
+    public byte[] getKey(int index) {
+      return array[index].getKey();
+    }
+
+    public List<byte[]> getValues(int index) {
+      return array[index].getValues();
+    }
+
+    public void verify(VectorMapJoinFastHashTable map,
+        HashTableKeyType hashTableKeyType,
+        PrimitiveTypeInfo[] valuePrimitiveTypeInfos, boolean doClipping,
+        boolean useExactBytes, Random random) throws IOException {
+      int mapSize = map.size();
+      if (mapSize != count) {
+        TestCase.fail("map.size() does not match expected count");
+      }
+
+      for (int index = 0; index < count; index++) {
+        FastRowHashMapElement element = array[index];
+
+        List<byte[]> values = element.getValues();
+
+        VectorMapJoinHashMapResult hashMapResult = null;
+        JoinUtil.JoinResult joinResult = JoinUtil.JoinResult.NOMATCH;
+        switch (hashTableKeyType) {
+        case BOOLEAN:
+        case BYTE:
+        case SHORT:
+        case INT:
+        case LONG:
+          {
+            Object[] keyRow = element.getKeyRow();
+            Object keyObject = keyRow[0];
+            VectorMapJoinFastLongHashMap longHashMap = (VectorMapJoinFastLongHashMap) map;
+            hashMapResult = longHashMap.createHashMapResult();
+            long longKey;
+            switch (hashTableKeyType) {
+            case BOOLEAN:
+              longKey = ((BooleanWritable) keyObject).get() ? 1 : 0;
+              break;
+            case BYTE:
+              longKey = ((ByteWritable) keyObject).get();
+              break;
+            case SHORT:
+              longKey = ((ShortWritable) keyObject).get();
+              break;
+            case INT:
+              longKey = ((IntWritable) keyObject).get();
+              break;
+            case LONG:
+              longKey = ((LongWritable) keyObject).get();
+              break;
+            default:
+              throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
+            }
+            joinResult = longHashMap.lookup(longKey, hashMapResult);
+            if (joinResult != JoinUtil.JoinResult.MATCH) {
+              assertTrue(false);
+            }
+          }
+          break;
+        case STRING:
+          {
+            Object[] keyRow = element.getKeyRow();
+            Object keyObject = keyRow[0];
+            VectorMapJoinFastStringHashMap stringHashMap = (VectorMapJoinFastStringHashMap) map;
+            hashMapResult = stringHashMap.createHashMapResult();
+            Text text = (Text) keyObject;
+            byte[] bytes = text.getBytes();
+            int length = text.getLength();
+            joinResult = stringHashMap.lookup(bytes, 0, length, hashMapResult);
+            if (joinResult != JoinUtil.JoinResult.MATCH) {
+              assertTrue(false);
+            }
+          }
+          break;
+        case MULTI_KEY:
+          {
+            byte[] keyBytes = element.getKey();
+            VectorMapJoinFastMultiKeyHashMap stringHashMap = (VectorMapJoinFastMultiKeyHashMap) map;
+            hashMapResult = stringHashMap.createHashMapResult();
+            joinResult = stringHashMap.lookup(keyBytes, 0, keyBytes.length, hashMapResult);
+            if (joinResult != JoinUtil.JoinResult.MATCH) {
+              assertTrue(false);
+            }
+          }
+          break;
+        default:
+          throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
+        }
+
+        int[] actualToValueMap = verifyHashMapValues(hashMapResult, values);
+
+        List<Object[]> rows = element.getValueRows();
+        if (!doClipping && !useExactBytes) {
+          verifyHashMapRows(rows, actualToValueMap, hashMapResult, valuePrimitiveTypeInfos);
+        } else {
+          int clipIndex = random.nextInt(rows.size());
+          verifyHashMapRowsMore(rows, actualToValueMap, hashMapResult, valuePrimitiveTypeInfos,
+              clipIndex, useExactBytes);
+        }
+      }
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
index bbfa65f..8525e99 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
@@ -269,4 +269,19 @@ public class TestVectorMapJoinFastBytesHashMap extends CommonFastHashTable {
     int keyCount = 1000;
     addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
   }
+
+  @Test
+  public void testReallyBig() throws Exception {
+    random = new Random(42662);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,LARGE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
+
+    VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+    int keyCount = 1000000;
+    addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+  }
 }


[2/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast DeserializeRead classes; display better exception information; add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)

Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
new file mode 100644
index 0000000..3f02eb3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
@@ -0,0 +1,718 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastRowHashMap.VerifyFastRowHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.junit.Test;
+
+/*
+ * An multi-key value hash map optimized for vector map join.
+ *
+ * The key is uninterpreted bytes.
+ */
+public class TestVectorMapJoinFastRowHashMap extends CommonFastHashTable {
+
+  private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows,
+      VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType,
+      VerifyFastRowHashMap verifyTable, String[] keyTypeNames,
+      boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
+
+    final int keyCount = keyTypeNames.length;
+    PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
+    PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
+    ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList =
+        new ArrayList<ObjectInspector>(keyCount);
+
+    for (int i = 0; i < keyCount; i++) {
+      PrimitiveTypeInfo primitiveTypeInfo =
+          (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
+      keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
+      PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+      keyPrimitiveCategories[i] = primitiveCategory;
+      keyPrimitiveObjectInspectorList.add(
+          PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+    }
+
+    boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
+    Arrays.fill(keyColumnSortOrderIsDesc, false);
+    byte[] keyColumnNullMarker = new byte[keyCount];
+    Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
+    byte[] keyColumnNotNullMarker = new byte[keyCount];
+    Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
+
+    BinarySortableSerializeWrite keySerializeWrite =
+        new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc,
+            keyColumnNullMarker, keyColumnNotNullMarker);
+
+
+    PrimitiveTypeInfo[] valuePrimitiveTypeInfos = valueSource.primitiveTypeInfos();
+    final int columnCount = valuePrimitiveTypeInfos.length;
+
+    SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
+
+    final int count = rows.length;
+    for (int i = 0; i < count; i++) {
+
+      Object[] valueRow = rows[i];
+
+      Output valueOutput = new Output();
+        ((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
+
+      for (int index = 0; index < columnCount; index++) {
+
+        Writable writable = (Writable) valueRow[index];
+
+        VerifyFastRow.serializeWrite(valueSerializeWrite, valuePrimitiveTypeInfos[index], writable);
+      }
+
+      byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
+
+      // Add a new key or add a value to an existing key?
+      byte[] key;
+      if (random.nextBoolean() || verifyTable.getCount() == 0) {
+        Object[] keyRow =
+            VectorRandomRowSource.randomRow(keyCount, random, keyPrimitiveObjectInspectorList,
+                keyPrimitiveCategories, keyPrimitiveTypeInfos);
+
+        Output keyOutput = new Output();
+        keySerializeWrite.set(keyOutput);
+
+        for (int index = 0; index < keyCount; index++) {
+
+          Writable writable = (Writable) keyRow[index];
+
+          VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], writable);
+        }
+
+        key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
+
+        verifyTable.add(key, keyRow, value, valueRow);
+      } else {
+        key = verifyTable.addRandomExisting(value, valueRow, random);
+      }
+
+      // Serialize keyRow into key bytes.
+      BytesWritable keyWritable = new BytesWritable(key);
+      BytesWritable valueWritable = new BytesWritable(value);
+      map.putRow(keyWritable, valueWritable);
+      // verifyTable.verify(map);
+    }
+    verifyTable.verify(map, hashTableKeyType, valuePrimitiveTypeInfos,
+        doClipping, useExactBytes, random);
+  }
+
+  @Test
+  public void testBigIntRows() throws Exception {
+    random = new Random(927337);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.LONG,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.LONG, verifyTable,
+        new String[] { "bigint" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testIntRows() throws Exception {
+    random = new Random(927337);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.INT,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.INT, verifyTable,
+        new String[] { "int" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testStringRows() throws Exception {
+    random = new Random(927337);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastStringHashMap map =
+        new VectorMapJoinFastStringHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.STRING, verifyTable,
+        new String[] { "string" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRows1() throws Exception {
+    random = new Random(833);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "int", "int" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRows2() throws Exception {
+    random = new Random(833099);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "string", "string" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRows3() throws Exception {
+    random = new Random(833099);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "bigint", "timestamp", "double" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testBigIntRowsClipped() throws Exception {
+    random = new Random(326232);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.LONG,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.LONG, verifyTable,
+        new String[] { "bigint" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testIntRowsClipped() throws Exception {
+    random = new Random(326232);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.INT,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.INT, verifyTable,
+        new String[] { "int" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testStringRowsClipped() throws Exception {
+    random = new Random(326232);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastStringHashMap map =
+        new VectorMapJoinFastStringHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.STRING, verifyTable,
+        new String[] { "string" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRowsClipped1() throws Exception {
+    random = new Random(2331);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "varchar(20)", "date", "interval_day_time" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRowsClipped2() throws Exception {
+    random = new Random(7403);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "varchar(20)", "varchar(40)" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRowsClipped3() throws Exception {
+    random = new Random(99);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "float", "tinyint" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+
+  @Test
+  public void testBigIntRowsExact() throws Exception {
+    random = new Random(27722);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.LONG,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.LONG, verifyTable,
+        new String[] { "bigint" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testIntRowsExact() throws Exception {
+    random = new Random(8238383);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.INT,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.INT, verifyTable,
+        new String[] { "int" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testStringRowsExact() throws Exception {
+    random = new Random(8235);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastStringHashMap map =
+        new VectorMapJoinFastStringHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.STRING, verifyTable,
+        new String[] { "string" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsExact1() throws Exception {
+    random = new Random(8235);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "string", "string", "string", "string" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsExact2() throws Exception {
+    random = new Random(8235);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "smallint" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsExact3() throws Exception {
+    random = new Random(8235);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "int", "binary" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testBigIntRowsClippedExact() throws Exception {
+    random = new Random(2122);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.LONG,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.LONG, verifyTable,
+        new String[] { "bigint" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testIntRowsClippedExact() throws Exception {
+    random = new Random(7520);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.INT,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.INT, verifyTable,
+        new String[] { "int" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testStringRowsClippedExact() throws Exception {
+    random = new Random(7539);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastStringHashMap map =
+        new VectorMapJoinFastStringHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.STRING, verifyTable,
+        new String[] { "string" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsClippedExact1() throws Exception {
+    random = new Random(13);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "interval_year_month", "decimal(12,8)" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsClippedExact2() throws Exception {
+    random = new Random(12);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "bigint", "string", "int" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsClippedExact3() throws Exception {
+    random = new Random(7);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "bigint", "string", "varchar(5000)" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
new file mode 100644
index 0000000..118e9e2
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
@@ -0,0 +1,397 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * (Copy of VerifyFast from serde).
+ *
+ */
+public class VerifyFastRow {
+
+  public static void verifyDeserializeRead(DeserializeRead deserializeRead,
+      PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+
+    boolean isNull;
+
+    isNull = deserializeRead.readCheckNull();
+    if (isNull) {
+      if (writable != null) {
+        TestCase.fail("Field reports null but object is not null");
+      }
+      return;
+    } else if (writable == null) {
+      TestCase.fail("Field report not null but object is null");
+    }
+    switch (primitiveTypeInfo.getPrimitiveCategory()) {
+      case BOOLEAN:
+      {
+        boolean value = deserializeRead.currentBoolean;
+        if (!(writable instanceof BooleanWritable)) {
+          TestCase.fail("Boolean expected writable not Boolean");
+        }
+        boolean expected = ((BooleanWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case BYTE:
+      {
+        byte value = deserializeRead.currentByte;
+        if (!(writable instanceof ByteWritable)) {
+          TestCase.fail("Byte expected writable not Byte");
+        }
+        byte expected = ((ByteWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
+        }
+      }
+      break;
+    case SHORT:
+      {
+        short value = deserializeRead.currentShort;
+        if (!(writable instanceof ShortWritable)) {
+          TestCase.fail("Short expected writable not Short");
+        }
+        short expected = ((ShortWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case INT:
+      {
+        int value = deserializeRead.currentInt;
+        if (!(writable instanceof IntWritable)) {
+          TestCase.fail("Integer expected writable not Integer");
+        }
+        int expected = ((IntWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case LONG:
+      {
+        long value = deserializeRead.currentLong;
+        if (!(writable instanceof LongWritable)) {
+          TestCase.fail("Long expected writable not Long");
+        }
+        Long expected = ((LongWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case FLOAT:
+      {
+        float value = deserializeRead.currentFloat;
+        if (!(writable instanceof FloatWritable)) {
+          TestCase.fail("Float expected writable not Float");
+        }
+        float expected = ((FloatWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case DOUBLE:
+      {
+        double value = deserializeRead.currentDouble;
+        if (!(writable instanceof DoubleWritable)) {
+          TestCase.fail("Double expected writable not Double");
+        }
+        double expected = ((DoubleWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case STRING:
+      {
+        byte[] stringBytes = Arrays.copyOfRange(
+            deserializeRead.currentBytes,
+            deserializeRead.currentBytesStart,
+            deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+        Text text = new Text(stringBytes);
+        String string = text.toString();
+        String expected = ((Text) writable).toString();
+        if (!string.equals(expected)) {
+          TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
+        }
+      }
+      break;
+    case CHAR:
+      {
+        byte[] stringBytes = Arrays.copyOfRange(
+            deserializeRead.currentBytes,
+            deserializeRead.currentBytesStart,
+            deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+        Text text = new Text(stringBytes);
+        String string = text.toString();
+
+        HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
+
+        HiveChar expected = ((HiveCharWritable) writable).getHiveChar();
+        if (!hiveChar.equals(expected)) {
+          TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
+        }
+      }
+      break;
+    case VARCHAR:
+      {
+        byte[] stringBytes = Arrays.copyOfRange(
+            deserializeRead.currentBytes,
+            deserializeRead.currentBytesStart,
+            deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+        Text text = new Text(stringBytes);
+        String string = text.toString();
+
+        HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
+
+        HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar();
+        if (!hiveVarchar.equals(expected)) {
+          TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
+        }
+      }
+      break;
+    case DECIMAL:
+      {
+        HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
+        if (value == null) {
+          TestCase.fail("Decimal field evaluated to NULL");
+        }
+        HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal();
+        if (!value.equals(expected)) {
+          DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
+          int precision = decimalTypeInfo.getPrecision();
+          int scale = decimalTypeInfo.getScale();
+          TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
+        }
+      }
+      break;
+    case DATE:
+      {
+        Date value = deserializeRead.currentDateWritable.get();
+        Date expected = ((DateWritable) writable).get();
+        if (!value.equals(expected)) {
+          TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+        }
+      }
+      break;
+    case TIMESTAMP:
+      {
+        Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
+        Timestamp expected = ((TimestampWritable) writable).getTimestamp();
+        if (!value.equals(expected)) {
+          TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+        }
+      }
+      break;
+    case INTERVAL_YEAR_MONTH:
+      {
+        HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
+        HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
+        if (!value.equals(expected)) {
+          TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+        }
+      }
+      break;
+    case INTERVAL_DAY_TIME:
+      {
+        HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
+        HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
+        if (!value.equals(expected)) {
+          TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+        }
+      }
+      break;
+    case BINARY:
+      {
+        byte[] byteArray = Arrays.copyOfRange(
+            deserializeRead.currentBytes,
+            deserializeRead.currentBytesStart,
+            deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+        BytesWritable bytesWritable = (BytesWritable) writable;
+        byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
+        if (byteArray.length != expected.length){
+          TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+              + " found " + Arrays.toString(byteArray) + ")");
+        }
+        for (int b = 0; b < byteArray.length; b++) {
+          if (byteArray[b] != expected[b]) {
+            TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+              + " found " + Arrays.toString(byteArray) + ")");
+          }
+        }
+      }
+      break;
+    default:
+      throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
+    }
+  }
+
+  public static void serializeWrite(SerializeWrite serializeWrite,
+      PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+    if (writable == null) {
+      serializeWrite.writeNull();
+      return;
+    }
+    switch (primitiveTypeInfo.getPrimitiveCategory()) {
+      case BOOLEAN:
+      {
+        boolean value = ((BooleanWritable) writable).get();
+        serializeWrite.writeBoolean(value);
+      }
+      break;
+    case BYTE:
+      {
+        byte value = ((ByteWritable) writable).get();
+        serializeWrite.writeByte(value);
+      }
+      break;
+    case SHORT:
+      {
+        short value = ((ShortWritable) writable).get();
+        serializeWrite.writeShort(value);
+      }
+      break;
+    case INT:
+      {
+        int value = ((IntWritable) writable).get();
+        serializeWrite.writeInt(value);
+      }
+      break;
+    case LONG:
+      {
+        long value = ((LongWritable) writable).get();
+        serializeWrite.writeLong(value);
+      }
+      break;
+    case FLOAT:
+      {
+        float value = ((FloatWritable) writable).get();
+        serializeWrite.writeFloat(value);
+      }
+      break;
+    case DOUBLE:
+      {
+        double value = ((DoubleWritable) writable).get();
+        serializeWrite.writeDouble(value);
+      }
+      break;
+    case STRING:
+      {
+        Text value = (Text) writable;
+        byte[] stringBytes = value.getBytes();
+        int stringLength = stringBytes.length;
+        serializeWrite.writeString(stringBytes, 0, stringLength);
+      }
+      break;
+    case CHAR:
+      {
+        HiveChar value = ((HiveCharWritable) writable).getHiveChar();
+        serializeWrite.writeHiveChar(value);
+      }
+      break;
+    case VARCHAR:
+      {
+        HiveVarchar value = ((HiveVarcharWritable) writable).getHiveVarchar();
+        serializeWrite.writeHiveVarchar(value);
+      }
+      break;
+    case DECIMAL:
+      {
+        HiveDecimal value = ((HiveDecimalWritable) writable).getHiveDecimal();
+        DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)primitiveTypeInfo;
+        serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
+      }
+      break;
+    case DATE:
+      {
+        Date value = ((DateWritable) writable).get();
+        serializeWrite.writeDate(value);
+      }
+      break;
+    case TIMESTAMP:
+      {
+        Timestamp value = ((TimestampWritable) writable).getTimestamp();
+        serializeWrite.writeTimestamp(value);
+      }
+      break;
+    case INTERVAL_YEAR_MONTH:
+      {
+        HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
+        serializeWrite.writeHiveIntervalYearMonth(value);
+      }
+      break;
+    case INTERVAL_DAY_TIME:
+      {
+        HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
+        serializeWrite.writeHiveIntervalDayTime(value);
+      }
+      break;
+    case BINARY:
+      {
+        BytesWritable byteWritable = (BytesWritable) writable;
+        byte[] binaryBytes = byteWritable.getBytes();
+        int length = byteWritable.getLength();
+        serializeWrite.writeBinary(binaryBytes, 0, length);
+      }
+      break;
+    default:
+      throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
index be36ba4..003a2d4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
@@ -42,7 +42,7 @@ import org.apache.hadoop.io.Text;
  *
  * Reading some fields require a results object to receive value information.  A separate
  * results object is created by the caller at initialization per different field even for the same
- * type. 
+ * type.
  *
  * Some type values are by reference to either bytes in the deserialization buffer or to
  * other type specific buffers.  So, those references are only valid until the next time set is
@@ -61,6 +61,8 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
   private int fieldCount;
 
   private int start;
+  private int end;
+  private int fieldStart;
 
   private byte[] tempTimestampBytes;
   private Text tempText;
@@ -68,7 +70,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
   private byte[] tempDecimalBuffer;
 
   private boolean readBeyondConfiguredFieldsWarned;
-  private boolean readBeyondBufferRangeWarned;
   private boolean bufferRangeHasExtraDataWarned;
 
   private InputByteBuffer inputByteBuffer = new InputByteBuffer();
@@ -92,7 +93,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
     }
     inputByteBuffer = new InputByteBuffer();
     readBeyondConfiguredFieldsWarned = false;
-    readBeyondBufferRangeWarned = false;
     bufferRangeHasExtraDataWarned = false;
   }
 
@@ -107,8 +107,40 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
   @Override
   public void set(byte[] bytes, int offset, int length) {
     fieldIndex = -1;
-    inputByteBuffer.reset(bytes, offset, offset + length);
     start = offset;
+    end = offset + length;
+    inputByteBuffer.reset(bytes, start, end);
+  }
+
+  /*
+   * Get detailed read position information to help diagnose exceptions.
+   */
+  public String getDetailedReadPositionString() {
+    StringBuffer sb = new StringBuffer();
+
+    sb.append("Reading inputByteBuffer of length ");
+    sb.append(inputByteBuffer.getEnd());
+    sb.append(" at start offset ");
+    sb.append(start);
+    sb.append(" for length ");
+    sb.append(end - start);
+    sb.append(" to read ");
+    sb.append(fieldCount);
+    sb.append(" fields with types ");
+    sb.append(Arrays.toString(typeInfos));
+    sb.append(".  ");
+    if (fieldIndex == -1) {
+      sb.append("Before first field?");
+    } else {
+      sb.append("Read field #");
+      sb.append(fieldIndex);
+      sb.append(" at field start position ");
+      sb.append(fieldStart);
+      sb.append(" current read offset ");
+      sb.append(inputByteBuffer.tell());
+    }
+
+    return sb.toString();
   }
 
   /*
@@ -133,12 +165,11 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
     }
     if (inputByteBuffer.isEof()) {
       // Also, reading beyond our byte range produces NULL.
-      if (!readBeyondBufferRangeWarned) {
-        doReadBeyondBufferRangeWarned();
-      }
-      // We cannot read beyond so we must return NULL here.
       return true;
     }
+
+    fieldStart = inputByteBuffer.tell();
+
     byte isNullByte = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]);
 
     if (isNullByte == 0) {
@@ -298,7 +329,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
           factor = -factor;
         }
 
-        int start = inputByteBuffer.tell();
+        int decimalStart = inputByteBuffer.tell();
         int length = 0;
 
         do {
@@ -317,7 +348,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
           tempDecimalBuffer = new byte[length];
         }
 
-        inputByteBuffer.seek(start);
+        inputByteBuffer.seek(decimalStart);
         for (int i = 0; i < length; ++i) {
           tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert);
         }
@@ -392,10 +423,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
     return readBeyondConfiguredFieldsWarned;
   }
   @Override
-  public boolean readBeyondBufferRangeWarned() {
-    return readBeyondBufferRangeWarned;
-  }
-  @Override
   public boolean bufferRangeHasExtraDataWarned() {
     return bufferRangeHasExtraDataWarned;
   }
@@ -410,14 +437,4 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
         + " reading more (NULLs returned).  Ignoring similar problems.");
     readBeyondConfiguredFieldsWarned = true;
   }
-
-  private void doReadBeyondBufferRangeWarned() {
-    // Warn only once.
-    int length = inputByteBuffer.tell() - start;
-    LOG.info("Reading beyond buffer range! Buffer range " +  start 
-        + " for length " + length + " but reading more... "
-        + "(total buffer length " + inputByteBuffer.getData().length + ")"
-        + "  Ignoring similar problems.");
-    readBeyondBufferRangeWarned = true;
-  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
index 2fad2af..8f3e771 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
@@ -143,10 +143,14 @@ public abstract class DeserializeRead {
    * Read integrity warning flags.
    */
   public abstract boolean readBeyondConfiguredFieldsWarned();
-  public abstract boolean readBeyondBufferRangeWarned();
   public abstract boolean bufferRangeHasExtraDataWarned();
 
   /*
+   * Get detailed read position information to help diagnose exceptions.
+   */
+  public abstract String getDetailedReadPositionString();
+
+  /*
    * These members hold the current value that was read when readCheckNull return false.
    */
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
deleted file mode 100644
index 1bb990c..0000000
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
+++ /dev/null
@@ -1,423 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.serde2.fast;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.common.type.RandomTypeUtil;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hive.common.util.DateUtils;
-
-/**
- * Generate object inspector and random row object[].
- */
-public class RandomRowObjectSource {
-
-  private Random r;
-
-  private int columnCount;
-
-  private List<String> typeNames;
-
-  private PrimitiveCategory[] primitiveCategories;
-
-  private PrimitiveTypeInfo[] primitiveTypeInfos;
-
-  private List<ObjectInspector> primitiveObjectInspectorList;
-
-  private StructObjectInspector rowStructObjectInspector;
-
-  public List<String> typeNames() {
-    return typeNames;
-  }
-
-  public PrimitiveCategory[] primitiveCategories() {
-    return primitiveCategories;
-  }
-
-  public PrimitiveTypeInfo[] primitiveTypeInfos() {
-    return primitiveTypeInfos;
-  }
-
-  public StructObjectInspector rowStructObjectInspector() {
-    return rowStructObjectInspector;
-  }
-
-  public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
-    ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
-        new ArrayList<ObjectInspector>(partialFieldCount);
-    List<String> columnNames = new ArrayList<String>(partialFieldCount);
-    for (int i = 0; i < partialFieldCount; i++) {
-      columnNames.add(String.format("partial%d", i));
-      partialPrimitiveObjectInspectorList.add(
-          PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
-              primitiveTypeInfos[i]));
-    }
-
-    return ObjectInspectorFactory.getStandardStructObjectInspector(
-        columnNames, primitiveObjectInspectorList);
-  }
-
-  public void init(Random r) {
-    this.r = r;
-    chooseSchema();
-  }
-
-  /*
-   * For now, exclude CHAR until we determine why there is a difference (blank padding)
-   * serializing with LazyBinarySerializeWrite and the regular SerDe...
-   */
-  private static String[] possibleHiveTypeNames = {
-      "boolean",
-      "tinyint",
-      "smallint",
-      "int",
-      "bigint",
-      "date",
-      "float",
-      "double",
-      "string",
-//    "char",
-      "varchar",
-      "binary",
-      "date",
-      "timestamp",
-      "interval_year_month",
-      "interval_day_time",
-      "decimal"
-  };
-
-  private void chooseSchema() {
-    HashSet hashSet = null;
-    boolean allTypes;
-    boolean onlyOne = (r.nextInt(100) == 7);
-    if (onlyOne) {
-      columnCount = 1;
-      allTypes = false;
-    } else {
-      allTypes = r.nextBoolean();
-      if (allTypes) {
-        // One of each type.
-        columnCount = possibleHiveTypeNames.length;
-        hashSet = new HashSet<Integer>();
-      } else {
-        columnCount = 1 + r.nextInt(20);
-      }
-    }
-    typeNames = new ArrayList<String>(columnCount);
-    primitiveCategories = new PrimitiveCategory[columnCount];
-    primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
-    primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
-    List<String> columnNames = new ArrayList<String>(columnCount);
-    for (int c = 0; c < columnCount; c++) {
-      columnNames.add(String.format("col%d", c));
-      String typeName;
-
-      if (onlyOne) {
-        typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
-      } else {
-        int typeNum;
-        if (allTypes) {
-          while (true) {
-            typeNum = r.nextInt(possibleHiveTypeNames.length);
-            Integer typeNumInteger = new Integer(typeNum);
-            if (!hashSet.contains(typeNumInteger)) {
-              hashSet.add(typeNumInteger);
-              break;
-            }
-          }
-        } else {
-          typeNum = r.nextInt(possibleHiveTypeNames.length);
-        }
-        typeName = possibleHiveTypeNames[typeNum];
-      }
-      if (typeName.equals("char")) {
-        int maxLength = 1 + r.nextInt(100);
-        typeName = String.format("char(%d)", maxLength);
-      } else if (typeName.equals("varchar")) {
-        int maxLength = 1 + r.nextInt(100);
-        typeName = String.format("varchar(%d)", maxLength);
-      } else if (typeName.equals("decimal")) {
-        typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
-      }
-      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
-      primitiveTypeInfos[c] = primitiveTypeInfo;
-      PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
-      primitiveCategories[c] = primitiveCategory;
-      primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
-      typeNames.add(typeName);
-    }
-    rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
-  }
-
-  public Object[][] randomRows(int n) {
-    Object[][] result = new Object[n][];
-    for (int i = 0; i < n; i++) {
-      result[i] = randomRow();
-    }
-    return result;
-  }
-
-  public Object[] randomRow() {
-    Object row[] = new Object[columnCount];
-    for (int c = 0; c < columnCount; c++) {
-      Object object = randomObject(c);
-      if (object == null) {
-        throw new Error("Unexpected null for column " + c);
-      }
-      row[c] = getWritableObject(c, object);
-      if (row[c] == null) {
-        throw new Error("Unexpected null for writable for column " + c);
-      }
-    }
-    return row;
-  }
-
-  public static void sort(Object[][] rows, ObjectInspector oi) {
-    for (int i = 0; i < rows.length; i++) {
-      for (int j = i + 1; j < rows.length; j++) {
-        if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
-          Object[] t = rows[i];
-          rows[i] = rows[j];
-          rows[j] = t;
-        }
-      }
-    }
-  }
-
-  public void sort(Object[][] rows) {
-    RandomRowObjectSource.sort(rows, rowStructObjectInspector);
-  }
-
-  public Object getWritableObject(int column, Object object) {
-    ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
-    PrimitiveCategory primitiveCategory = primitiveCategories[column];
-    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
-    switch (primitiveCategory) {
-    case BOOLEAN:
-      return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
-    case BYTE:
-      return ((WritableByteObjectInspector) objectInspector).create((byte) object);
-    case SHORT:
-      return ((WritableShortObjectInspector) objectInspector).create((short) object);
-    case INT:
-      return ((WritableIntObjectInspector) objectInspector).create((int) object);
-    case LONG:
-      return ((WritableLongObjectInspector) objectInspector).create((long) object);
-    case DATE:
-      return ((WritableDateObjectInspector) objectInspector).create((Date) object);
-    case FLOAT:
-      return ((WritableFloatObjectInspector) objectInspector).create((float) object);
-    case DOUBLE:
-      return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
-    case STRING:
-      return ((WritableStringObjectInspector) objectInspector).create((String) object);
-    case CHAR:
-      {
-        WritableHiveCharObjectInspector writableCharObjectInspector =
-                new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
-        return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
-      }
-    case VARCHAR:
-      {
-        WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
-                new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
-        return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
-      }
-    case BINARY:
-      return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
-    case TIMESTAMP:
-      return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
-    case INTERVAL_YEAR_MONTH:
-      return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
-    case INTERVAL_DAY_TIME:
-      return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
-    case DECIMAL:
-      {
-        WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
-                new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
-        return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
-      }
-    default:
-      throw new Error("Unknown primitive category " + primitiveCategory);
-    }
-  }
-
-  public Object randomObject(int column) {
-    PrimitiveCategory primitiveCategory = primitiveCategories[column];
-    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
-    switch (primitiveCategory) {
-    case BOOLEAN:
-      return Boolean.valueOf(r.nextInt(1) == 1);
-    case BYTE:
-      return Byte.valueOf((byte) r.nextInt());
-    case SHORT:
-      return Short.valueOf((short) r.nextInt());
-    case INT:
-      return Integer.valueOf(r.nextInt());
-    case LONG:
-      return Long.valueOf(r.nextLong());
-    case DATE:
-      return RandomTypeUtil.getRandDate(r);
-    case FLOAT:
-      return Float.valueOf(r.nextFloat() * 10 - 5);
-    case DOUBLE:
-      return Double.valueOf(r.nextDouble() * 10 - 5);
-    case STRING:
-      return RandomTypeUtil.getRandString(r);
-    case CHAR:
-      return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
-    case VARCHAR:
-      return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
-    case BINARY:
-      return getRandBinary(r, 1 + r.nextInt(100));
-    case TIMESTAMP:
-      return RandomTypeUtil.getRandTimestamp(r);
-    case INTERVAL_YEAR_MONTH:
-      return getRandIntervalYearMonth(r);
-    case INTERVAL_DAY_TIME:
-      return getRandIntervalDayTime(r);
-    case DECIMAL:
-      return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
-    default:
-      throw new Error("Unknown primitive category " + primitiveCategory);
-    }
-  }
-
-  public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
-    int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
-    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
-    HiveChar hiveChar = new HiveChar(randomString, maxLength);
-    return hiveChar;
-  }
-
-  public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
-    int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
-    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
-    HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
-    return hiveVarchar;
-  }
-
-  public static byte[] getRandBinary(Random r, int len){
-    byte[] bytes = new byte[len];
-    for (int j = 0; j < len; j++){
-      bytes[j] = Byte.valueOf((byte) r.nextInt());
-    }
-    return bytes;
-  }
-
-  private static final String DECIMAL_CHARS = "0123456789";
-
-  public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
-    while (true) {
-      StringBuilder sb = new StringBuilder();
-      int precision = 1 + r.nextInt(18);
-      int scale = 0 + r.nextInt(precision + 1);
-
-      int integerDigits = precision - scale;
-
-      if (r.nextBoolean()) {
-        sb.append("-");
-      }
-
-      if (integerDigits == 0) {
-        sb.append("0");
-      } else {
-        sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
-      }
-      if (scale != 0) {
-        sb.append(".");
-        sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
-      }
-
-      HiveDecimal bd = HiveDecimal.create(sb.toString());
-      if (bd.scale() > bd.precision()) {
-        // Sometimes weird decimals are produced?
-        continue;
-      }
-
-      return bd;
-    }
-  }
-
-  public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
-    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
-    String intervalYearMonthStr = String.format("%s%d-%d",
-        yearMonthSignStr,
-        Integer.valueOf(1800 + r.nextInt(500)),  // year
-        Integer.valueOf(0 + r.nextInt(12)));     // month
-    HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
-    return intervalYearMonthVal;
-  }
-
-  public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
-    String optionalNanos = "";
-    if (r.nextInt(2) == 1) {
-      optionalNanos = String.format(".%09d",
-          Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
-    }
-    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
-    String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
-        yearMonthSignStr,
-        Integer.valueOf(1 + r.nextInt(28)),      // day
-        Integer.valueOf(0 + r.nextInt(24)),      // hour
-        Integer.valueOf(0 + r.nextInt(60)),      // minute
-        Integer.valueOf(0 + r.nextInt(60)),      // second
-        optionalNanos);
-    HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
-    return intervalDayTimeVal;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
index e562ce3..fb41420 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
@@ -96,7 +96,7 @@ public interface SerializeWrite {
 
   /*
    * STRING.
-   * 
+   *
    * Can be used to write CHAR and VARCHAR when the caller takes responsibility for
    * truncation/padding issues.
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
index 765ba7e..ac44390 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.serde2.lazy.fast;
 import java.io.UnsupportedEncodingException;
 import java.nio.charset.CharacterCodingException;
 import java.sql.Date;
+import java.util.Arrays;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -73,6 +74,7 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
   private int end;
   private int fieldCount;
   private int fieldIndex;
+  private int parseFieldIndex;
   private int fieldStart;
   private int fieldLength;
 
@@ -124,6 +126,41 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
     fieldIndex = -1;
   }
 
+  /*
+   * Get detailed read position information to help diagnose exceptions.
+   */
+  public String getDetailedReadPositionString() {
+    StringBuffer sb = new StringBuffer();
+
+    sb.append("Reading byte[] of length ");
+    sb.append(bytes.length);
+    sb.append(" at start offset ");
+    sb.append(start);
+    sb.append(" for length ");
+    sb.append(end - start);
+    sb.append(" to read ");
+    sb.append(fieldCount);
+    sb.append(" fields with types ");
+    sb.append(Arrays.toString(typeInfos));
+    sb.append(".  ");
+    if (fieldIndex == -1) {
+      sb.append("Error during field delimitor parsing of field #");
+      sb.append(parseFieldIndex);
+    } else {
+      sb.append("Read field #");
+      sb.append(fieldIndex);
+      sb.append(" at field start position ");
+      sb.append(startPosition[fieldIndex]);
+      int currentFieldLength = startPosition[fieldIndex + 1] - startPosition[fieldIndex] - 1;
+      sb.append(" for field length ");
+      sb.append(currentFieldLength);
+      sb.append(" current read offset ");
+      sb.append(offset);
+    }
+
+    return sb.toString();
+  }
+
   /**
    * Parse the byte[] and fill each field.
    *
@@ -133,27 +170,29 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
   private void parse() {
 
     int structByteEnd = end;
-    int fieldId = 0;
     int fieldByteBegin = start;
     int fieldByteEnd = start;
 
+    // Kept as a member variable to support getDetailedReadPositionString.
+    parseFieldIndex = 0;
+
     // Go through all bytes in the byte[]
     while (fieldByteEnd <= structByteEnd) {
       if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
         // Reached the end of a field?
-        if (lastColumnTakesRest && fieldId == fieldCount - 1) {
+        if (lastColumnTakesRest && parseFieldIndex == fieldCount - 1) {
           fieldByteEnd = structByteEnd;
         }
-        startPosition[fieldId] = fieldByteBegin;
-        fieldId++;
-        if (fieldId == fieldCount || fieldByteEnd == structByteEnd) {
+        startPosition[parseFieldIndex] = fieldByteBegin;
+        parseFieldIndex++;
+        if (parseFieldIndex == fieldCount || fieldByteEnd == structByteEnd) {
           // All fields have been parsed, or bytes have been parsed.
           // We need to set the startPosition of fields.length to ensure we
           // can use the same formula to calculate the length of each field.
           // For missing fields, their starting positions will all be the same,
           // which will make their lengths to be -1 and uncheckedGetField will
           // return these fields as NULLs.
-          for (int i = fieldId; i <= fieldCount; i++) {
+          for (int i = parseFieldIndex; i <= fieldCount; i++) {
             startPosition[i] = fieldByteEnd + 1;
           }
           break;
@@ -177,8 +216,8 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
     }
 
     // Missing fields?
-    if (!missingFieldWarned && fieldId < fieldCount) {
-      doMissingFieldWarned(fieldId);
+    if (!missingFieldWarned && parseFieldIndex < fieldCount) {
+      doMissingFieldWarned(parseFieldIndex);
     }
   }
 
@@ -518,12 +557,8 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
     return missingFieldWarned;
   }
   @Override
-  public boolean readBeyondBufferRangeWarned() {
-    return extraFieldWarned;
-  }
-  @Override
   public boolean bufferRangeHasExtraDataWarned() {
-    return false;  // UNDONE: Get rid of...
+    return false;
   }
 
   private void doExtraFieldWarned() {

http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
index bbb35c7..0df1d79 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.serde2.lazybinary.fast;
 
 import java.io.EOFException;
 import java.io.IOException;
+import java.util.Arrays;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -31,6 +33,7 @@ import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
 import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
 
 /*
  * Directly deserialize with the caller reading field-by-field the LazyBinary serialization format.
@@ -54,6 +57,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
   private int offset;
   private int end;
   private int fieldCount;
+  private int fieldStart;
   private int fieldIndex;
   private byte nullByte;
 
@@ -62,7 +66,6 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
   private VLong tempVLong;
 
   private boolean readBeyondConfiguredFieldsWarned;
-  private boolean readBeyondBufferRangeWarned;
   private boolean bufferRangeHasExtraDataWarned;
 
   public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) {
@@ -71,7 +74,6 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
     tempVInt = new VInt();
     tempVLong = new VLong();
     readBeyondConfiguredFieldsWarned = false;
-    readBeyondBufferRangeWarned = false;
     bufferRangeHasExtraDataWarned = false;
   }
 
@@ -93,6 +95,32 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
   }
 
   /*
+   * Get detailed read position information to help diagnose exceptions.
+   */
+  public String getDetailedReadPositionString() {
+    StringBuffer sb = new StringBuffer();
+
+    sb.append("Reading byte[] of length ");
+    sb.append(bytes.length);
+    sb.append(" at start offset ");
+    sb.append(start);
+    sb.append(" for length ");
+    sb.append(end - start);
+    sb.append(" to read ");
+    sb.append(fieldCount);
+    sb.append(" fields with types ");
+    sb.append(Arrays.toString(typeInfos));
+    sb.append(".  Read field #");
+    sb.append(fieldIndex);
+    sb.append(" at field start position ");
+    sb.append(fieldStart);
+    sb.append(" current read offset ");
+    sb.append(offset);
+
+    return sb.toString();
+  }
+
+  /*
    * Reads the NULL information for a field.
    *
    * @return Returns true when the field is NULL; reading is positioned to the next field.
@@ -111,11 +139,13 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       return true;
     }
 
+    fieldStart = offset;
+
     if (fieldIndex == 0) {
       // The rest of the range check for fields after the first is below after checking
       // the NULL byte.
       if (offset >= end) {
-        warnBeyondEof();
+        throw new EOFException();
       }
       nullByte = bytes[offset++];
     }
@@ -129,9 +159,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
 
       // Make sure there is at least one byte that can be read for a value.
       if (offset >= end) {
-        // Careful: since we may be dealing with NULLs in the final NULL byte, we check after
-        // the NULL byte check..
-        warnBeyondEof();
+        throw new EOFException();
       }
 
       /*
@@ -149,33 +177,33 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       case SHORT:
         // Last item -- ok to be at end.
         if (offset + 2 > end) {
-          warnBeyondEof();
+          throw new EOFException();
         }
         currentShort = LazyBinaryUtils.byteArrayToShort(bytes, offset);
         offset += 2;
         break;
       case INT:
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
         offset += tempVInt.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
         currentInt = tempVInt.value;
         break;
       case LONG:
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
         offset += tempVLong.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
         currentLong = tempVLong.value;
         break;
       case FLOAT:
         // Last item -- ok to be at end.
         if (offset + 4 > end) {
-          warnBeyondEof();
+          throw new EOFException();
         }
         currentFloat = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset));
         offset += 4;
@@ -183,7 +211,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       case DOUBLE:
         // Last item -- ok to be at end.
         if (offset + 8 > end) {
-          warnBeyondEof();
+          throw new EOFException();
         }
         currentDouble = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset));
         offset += 8;
@@ -195,18 +223,19 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       case VARCHAR:
         {
           // using vint instead of 4 bytes
+          // Parse the first byte of a vint/vlong to determine the number of bytes.
+          if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+            throw new EOFException();
+          }
           LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
           offset += tempVInt.length;
-          // Could be last item for empty string -- ok to be at end.
-          if (offset > end) {
-            warnBeyondEof();
-          }
+
           int saveStart = offset;
           int length = tempVInt.value;
           offset += length;
           // Last item -- ok to be at end.
           if (offset > end) {
-            warnBeyondEof();
+            throw new EOFException();
           }
 
           currentBytes = bytes;
@@ -215,12 +244,12 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
         }
         break;
       case DATE:
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
         offset += tempVInt.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
 
         currentDateWritable.set(tempVInt.value);
         break;
@@ -231,34 +260,37 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
           offset += length;
           // Last item -- ok to be at end.
           if (offset > end) {
-            warnBeyondEof();
+            throw new EOFException();
           }
 
           currentTimestampWritable.set(bytes, saveStart);
         }
         break;
       case INTERVAL_YEAR_MONTH:
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
         offset += tempVInt.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
+
         currentHiveIntervalYearMonthWritable.set(tempVInt.value);
         break;
       case INTERVAL_DAY_TIME:
+        // The first bounds check requires at least one more byte beyond for 2nd int (hence >=).
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
         offset += tempVLong.length;
-        if (offset >= end) {
-          // Overshoot or not enough for next item.
-          warnBeyondEof();
+
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
         }
         LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
         offset += tempVInt.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
 
         currentHiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value);
         break;
@@ -269,23 +301,27 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
 
           // These calls are to see how much data there is. The setFromBytes call below will do the same
           // readVInt reads but actually unpack the decimal.
+
+          // The first bounds check requires at least one more byte beyond for 2nd int (hence >=).
+          // Parse the first byte of a vint/vlong to determine the number of bytes.
+          if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
+            throw new EOFException();
+          }
           LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
           int saveStart = offset;
           offset += tempVInt.length;
-          if (offset >= end) {
-            // Overshoot or not enough for next item.
-            warnBeyondEof();
+
+          // Parse the first byte of a vint/vlong to determine the number of bytes.
+          if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+            throw new EOFException();
           }
           LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
           offset += tempVInt.length;
-          if (offset >= end) {
-            // Overshoot or not enough for next item.
-            warnBeyondEof();
-          }
+
           offset += tempVInt.value;
           // Last item -- ok to be at end.
           if (offset > end) {
-            warnBeyondEof();
+            throw new EOFException();
           }
           int length = offset - saveStart;
 
@@ -327,7 +363,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       if ((fieldIndex % 8) == 0) {
         // Get next null byte.
         if (offset >= end) {
-          warnBeyondEof();
+          throw new EOFException();
         }
         nullByte = bytes[offset++];
       }
@@ -363,23 +399,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
     return readBeyondConfiguredFieldsWarned;
   }
   @Override
-  public boolean readBeyondBufferRangeWarned() {
-    return readBeyondBufferRangeWarned;
-  }
-  @Override
   public boolean bufferRangeHasExtraDataWarned() {
     return bufferRangeHasExtraDataWarned;
   }
-
-  private void warnBeyondEof() throws EOFException {
-    if (!readBeyondBufferRangeWarned) {
-      // Warn only once.
-      int length = end - start;
-      LOG.info("Reading beyond buffer range! Buffer range " +  start
-          + " for length " + length + " but reading more... "
-          + "(total buffer length " + bytes.length + ")"
-          + "  Ignoring similar problems.");
-      readBeyondBufferRangeWarned = true;
-    }
-  }
 }