You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/08/20 08:32:56 UTC
[1/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast
DeserializeRead classes; display better exception information;
add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)
Repository: hive
Updated Branches:
refs/heads/master 64e916245 -> 2d1f403dd
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java b/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
new file mode 100644
index 0000000..f08a075
--- /dev/null
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
@@ -0,0 +1,423 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.RandomTypeUtil;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hive.common.util.DateUtils;
+
+/**
+ * Generate object inspector and random row object[].
+ */
+public class SerdeRandomRowSource {
+
+ private Random r;
+
+ private int columnCount;
+
+ private List<String> typeNames;
+
+ private PrimitiveCategory[] primitiveCategories;
+
+ private PrimitiveTypeInfo[] primitiveTypeInfos;
+
+ private List<ObjectInspector> primitiveObjectInspectorList;
+
+ private StructObjectInspector rowStructObjectInspector;
+
+ public List<String> typeNames() {
+ return typeNames;
+ }
+
+ public PrimitiveCategory[] primitiveCategories() {
+ return primitiveCategories;
+ }
+
+ public PrimitiveTypeInfo[] primitiveTypeInfos() {
+ return primitiveTypeInfos;
+ }
+
+ public StructObjectInspector rowStructObjectInspector() {
+ return rowStructObjectInspector;
+ }
+
+ public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
+ ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
+ new ArrayList<ObjectInspector>(partialFieldCount);
+ List<String> columnNames = new ArrayList<String>(partialFieldCount);
+ for (int i = 0; i < partialFieldCount; i++) {
+ columnNames.add(String.format("partial%d", i));
+ partialPrimitiveObjectInspectorList.add(
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ primitiveTypeInfos[i]));
+ }
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(
+ columnNames, primitiveObjectInspectorList);
+ }
+
+ public void init(Random r) {
+ this.r = r;
+ chooseSchema();
+ }
+
+ /*
+ * For now, exclude CHAR until we determine why there is a difference (blank padding)
+ * serializing with LazyBinarySerializeWrite and the regular SerDe...
+ */
+ private static String[] possibleHiveTypeNames = {
+ "boolean",
+ "tinyint",
+ "smallint",
+ "int",
+ "bigint",
+ "date",
+ "float",
+ "double",
+ "string",
+// "char",
+ "varchar",
+ "binary",
+ "date",
+ "timestamp",
+ "interval_year_month",
+ "interval_day_time",
+ "decimal"
+ };
+
+ private void chooseSchema() {
+ HashSet hashSet = null;
+ boolean allTypes;
+ boolean onlyOne = (r.nextInt(100) == 7);
+ if (onlyOne) {
+ columnCount = 1;
+ allTypes = false;
+ } else {
+ allTypes = r.nextBoolean();
+ if (allTypes) {
+ // One of each type.
+ columnCount = possibleHiveTypeNames.length;
+ hashSet = new HashSet<Integer>();
+ } else {
+ columnCount = 1 + r.nextInt(20);
+ }
+ }
+ typeNames = new ArrayList<String>(columnCount);
+ primitiveCategories = new PrimitiveCategory[columnCount];
+ primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
+ primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
+ List<String> columnNames = new ArrayList<String>(columnCount);
+ for (int c = 0; c < columnCount; c++) {
+ columnNames.add(String.format("col%d", c));
+ String typeName;
+
+ if (onlyOne) {
+ typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
+ } else {
+ int typeNum;
+ if (allTypes) {
+ while (true) {
+ typeNum = r.nextInt(possibleHiveTypeNames.length);
+ Integer typeNumInteger = new Integer(typeNum);
+ if (!hashSet.contains(typeNumInteger)) {
+ hashSet.add(typeNumInteger);
+ break;
+ }
+ }
+ } else {
+ typeNum = r.nextInt(possibleHiveTypeNames.length);
+ }
+ typeName = possibleHiveTypeNames[typeNum];
+ }
+ if (typeName.equals("char")) {
+ int maxLength = 1 + r.nextInt(100);
+ typeName = String.format("char(%d)", maxLength);
+ } else if (typeName.equals("varchar")) {
+ int maxLength = 1 + r.nextInt(100);
+ typeName = String.format("varchar(%d)", maxLength);
+ } else if (typeName.equals("decimal")) {
+ typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
+ }
+ PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+ primitiveTypeInfos[c] = primitiveTypeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+ primitiveCategories[c] = primitiveCategory;
+ primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+ typeNames.add(typeName);
+ }
+ rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
+ }
+
+ public Object[][] randomRows(int n) {
+ Object[][] result = new Object[n][];
+ for (int i = 0; i < n; i++) {
+ result[i] = randomRow();
+ }
+ return result;
+ }
+
+ public Object[] randomRow() {
+ Object row[] = new Object[columnCount];
+ for (int c = 0; c < columnCount; c++) {
+ Object object = randomObject(c);
+ if (object == null) {
+ throw new Error("Unexpected null for column " + c);
+ }
+ row[c] = getWritableObject(c, object);
+ if (row[c] == null) {
+ throw new Error("Unexpected null for writable for column " + c);
+ }
+ }
+ return row;
+ }
+
+ public static void sort(Object[][] rows, ObjectInspector oi) {
+ for (int i = 0; i < rows.length; i++) {
+ for (int j = i + 1; j < rows.length; j++) {
+ if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
+ Object[] t = rows[i];
+ rows[i] = rows[j];
+ rows[j] = t;
+ }
+ }
+ }
+ }
+
+ public void sort(Object[][] rows) {
+ SerdeRandomRowSource.sort(rows, rowStructObjectInspector);
+ }
+
+ public Object getWritableObject(int column, Object object) {
+ ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
+ PrimitiveCategory primitiveCategory = primitiveCategories[column];
+ PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
+ case BYTE:
+ return ((WritableByteObjectInspector) objectInspector).create((byte) object);
+ case SHORT:
+ return ((WritableShortObjectInspector) objectInspector).create((short) object);
+ case INT:
+ return ((WritableIntObjectInspector) objectInspector).create((int) object);
+ case LONG:
+ return ((WritableLongObjectInspector) objectInspector).create((long) object);
+ case DATE:
+ return ((WritableDateObjectInspector) objectInspector).create((Date) object);
+ case FLOAT:
+ return ((WritableFloatObjectInspector) objectInspector).create((float) object);
+ case DOUBLE:
+ return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
+ case STRING:
+ return ((WritableStringObjectInspector) objectInspector).create((String) object);
+ case CHAR:
+ {
+ WritableHiveCharObjectInspector writableCharObjectInspector =
+ new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
+ return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
+ }
+ case VARCHAR:
+ {
+ WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
+ new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
+ return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
+ }
+ case BINARY:
+ return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
+ case TIMESTAMP:
+ return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
+ case INTERVAL_YEAR_MONTH:
+ return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
+ case INTERVAL_DAY_TIME:
+ return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
+ case DECIMAL:
+ {
+ WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
+ new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
+ return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
+ }
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategory);
+ }
+ }
+
+ public Object randomObject(int column) {
+ PrimitiveCategory primitiveCategory = primitiveCategories[column];
+ PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return Boolean.valueOf(r.nextInt(1) == 1);
+ case BYTE:
+ return Byte.valueOf((byte) r.nextInt());
+ case SHORT:
+ return Short.valueOf((short) r.nextInt());
+ case INT:
+ return Integer.valueOf(r.nextInt());
+ case LONG:
+ return Long.valueOf(r.nextLong());
+ case DATE:
+ return RandomTypeUtil.getRandDate(r);
+ case FLOAT:
+ return Float.valueOf(r.nextFloat() * 10 - 5);
+ case DOUBLE:
+ return Double.valueOf(r.nextDouble() * 10 - 5);
+ case STRING:
+ return RandomTypeUtil.getRandString(r);
+ case CHAR:
+ return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
+ case VARCHAR:
+ return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
+ case BINARY:
+ return getRandBinary(r, 1 + r.nextInt(100));
+ case TIMESTAMP:
+ return RandomTypeUtil.getRandTimestamp(r);
+ case INTERVAL_YEAR_MONTH:
+ return getRandIntervalYearMonth(r);
+ case INTERVAL_DAY_TIME:
+ return getRandIntervalDayTime(r);
+ case DECIMAL:
+ return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategory);
+ }
+ }
+
+ public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
+ int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
+ String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+ HiveChar hiveChar = new HiveChar(randomString, maxLength);
+ return hiveChar;
+ }
+
+ public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
+ int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
+ String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+ HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
+ return hiveVarchar;
+ }
+
+ public static byte[] getRandBinary(Random r, int len){
+ byte[] bytes = new byte[len];
+ for (int j = 0; j < len; j++){
+ bytes[j] = Byte.valueOf((byte) r.nextInt());
+ }
+ return bytes;
+ }
+
+ private static final String DECIMAL_CHARS = "0123456789";
+
+ public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
+ while (true) {
+ StringBuilder sb = new StringBuilder();
+ int precision = 1 + r.nextInt(18);
+ int scale = 0 + r.nextInt(precision + 1);
+
+ int integerDigits = precision - scale;
+
+ if (r.nextBoolean()) {
+ sb.append("-");
+ }
+
+ if (integerDigits == 0) {
+ sb.append("0");
+ } else {
+ sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
+ }
+ if (scale != 0) {
+ sb.append(".");
+ sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
+ }
+
+ HiveDecimal bd = HiveDecimal.create(sb.toString());
+ if (bd.scale() > bd.precision()) {
+ // Sometimes weird decimals are produced?
+ continue;
+ }
+
+ return bd;
+ }
+ }
+
+ public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
+ String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+ String intervalYearMonthStr = String.format("%s%d-%d",
+ yearMonthSignStr,
+ Integer.valueOf(1800 + r.nextInt(500)), // year
+ Integer.valueOf(0 + r.nextInt(12))); // month
+ HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
+ return intervalYearMonthVal;
+ }
+
+ public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
+ String optionalNanos = "";
+ if (r.nextInt(2) == 1) {
+ optionalNanos = String.format(".%09d",
+ Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
+ }
+ String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+ String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
+ yearMonthSignStr,
+ Integer.valueOf(1 + r.nextInt(28)), // day
+ Integer.valueOf(0 + r.nextInt(24)), // hour
+ Integer.valueOf(0 + r.nextInt(60)), // minute
+ Integer.valueOf(0 + r.nextInt(60)), // second
+ optionalNanos);
+ HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
+ return intervalDayTimeVal;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
index 58937db..7babf7a 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
@@ -25,10 +25,10 @@ import java.util.Random;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerdeRandomRowSource;
import org.apache.hadoop.hive.serde2.VerifyFast;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
@@ -39,8 +39,11 @@ import junit.framework.TestCase;
public class TestBinarySortableFast extends TestCase {
+ private static String debugDetailedReadPositionString;
+ private static StackTraceElement[] debugStackTrace;
+
private void testBinarySortableFast(
- RandomRowObjectSource source, Object[][] rows,
+ SerdeRandomRowSource source, Object[][] rows,
boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker,
SerDe serde, StructObjectInspector rowOI,
SerDe serde_fewer, StructObjectInspector writeRowOI,
@@ -134,11 +137,6 @@ public class TestBinarySortableFast extends TestCase {
}
binarySortableDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned());
- if (doWriteFewerColumns) {
- TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned());
- } else {
- TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned());
- }
TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned());
/*
@@ -161,6 +159,8 @@ public class TestBinarySortableFast extends TestCase {
try {
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
} catch (EOFException e) {
+// debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
+// debugStackTrace = e.getStackTrace();
threw = true;
}
TestCase.assertTrue(threw);
@@ -268,11 +268,6 @@ public class TestBinarySortableFast extends TestCase {
}
binarySortableDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned());
- if (doWriteFewerColumns) {
- TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned());
- } else {
- TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned());
- }
TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned());
}
}
@@ -280,7 +275,7 @@ public class TestBinarySortableFast extends TestCase {
private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Random r)
throws Throwable {
- RandomRowObjectSource source = new RandomRowObjectSource();
+ SerdeRandomRowSource source = new SerdeRandomRowSource();
source.init(r);
int rowCount = 1000;
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
index 76b93c6..66c6203 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
@@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hive.serde2.lazy;
-import java.io.EOFException;
import java.util.Arrays;
import java.util.Properties;
import java.util.Random;
@@ -25,19 +24,14 @@ import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.SerdeRandomRowSource;
import org.apache.hadoop.hive.serde2.VerifyFast;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo;
-import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.io.BytesWritable;
@@ -49,7 +43,7 @@ import junit.framework.TestCase;
public class TestLazySimpleFast extends TestCase {
private void testLazySimpleFast(
- RandomRowObjectSource source, Object[][] rows,
+ SerdeRandomRowSource source, Object[][] rows,
LazySimpleSerDe serde, StructObjectInspector rowOI,
LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI,
byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer,
@@ -131,11 +125,6 @@ public class TestLazySimpleFast extends TestCase {
}
lazySimpleDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned());
- if (doWriteFewerColumns) {
- TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
- } else {
- TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
- }
TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned());
}
@@ -219,11 +208,6 @@ public class TestLazySimpleFast extends TestCase {
}
lazySimpleDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned());
- if (doWriteFewerColumns) {
- TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
- } else {
- TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
- }
TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned());
}
}
@@ -266,7 +250,7 @@ public class TestLazySimpleFast extends TestCase {
public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r)
throws Throwable {
- RandomRowObjectSource source = new RandomRowObjectSource();
+ SerdeRandomRowSource source = new SerdeRandomRowSource();
source.init(r);
int rowCount = 1000;
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
index d7c4999..5af11cd 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
@@ -17,43 +17,28 @@
*/
package org.apache.hadoop.hive.serde2.lazybinary;
-import java.io.EOFException;
-import java.util.ArrayList;
import java.util.Arrays;
-import java.util.List;
import java.util.Random;
import junit.framework.TestCase;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerdeRandomRowSource;
import org.apache.hadoop.hive.serde2.VerifyFast;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo;
-import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
-import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
public class TestLazyBinaryFast extends TestCase {
private void testLazyBinaryFast(
- RandomRowObjectSource source, Object[][] rows,
+ SerdeRandomRowSource source, Object[][] rows,
SerDe serde, StructObjectInspector rowOI,
SerDe serde_fewer, StructObjectInspector writeRowOI,
PrimitiveTypeInfo[] primitiveTypeInfos,
@@ -131,7 +116,6 @@ public class TestLazyBinaryFast extends TestCase {
} else {
TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
}
- TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
}
@@ -228,19 +212,13 @@ public class TestLazyBinaryFast extends TestCase {
}
lazyBinaryDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
- if (doWriteFewerColumns) {
- // The nullByte may cause this to not be true...
- // TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
- } else {
- TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
- }
TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
}
}
public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
- RandomRowObjectSource source = new RandomRowObjectSource();
+ SerdeRandomRowSource source = new SerdeRandomRowSource();
source.init(r);
int rowCount = 1000;
[3/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast
DeserializeRead classes; display better exception information;
add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
HIVE-13874: Tighten up EOF checking in Fast DeserializeRead classes; display better exception information; add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2d1f403d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2d1f403d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2d1f403d
Branch: refs/heads/master
Commit: 2d1f403dd9df751136c1b77a9cdccd3212d20c24
Parents: 64e9162
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat Aug 20 01:32:45 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat Aug 20 01:32:45 2016 -0700
----------------------------------------------------------------------
.../persistence/BytesBytesMultiHashMap.java | 29 +-
.../hive/ql/exec/tez/ReduceRecordSource.java | 10 +-
.../ql/exec/vector/VectorDeserializeRow.java | 32 +-
.../hive/ql/exec/vector/VectorMapOperator.java | 10 +-
.../VectorMapJoinGenerateResultOperator.java | 64 +-
.../fast/VectorMapJoinFastLongHashTable.java | 11 +-
.../fast/VectorMapJoinFastStringCommon.java | 14 +-
.../fast/VectorMapJoinFastValueStore.java | 56 +-
.../hashtable/VectorMapJoinHashMapResult.java | 4 +-
.../VectorMapJoinOptimizedHashMap.java | 10 +-
.../persistence/TestBytesBytesMultiHashMap.java | 2 -
.../ql/exec/vector/TestVectorRowObject.java | 3 +-
.../hive/ql/exec/vector/TestVectorSerDeRow.java | 12 +-
.../ql/exec/vector/VectorRandomRowSource.java | 458 ++++++++++++
.../vector/mapjoin/fast/CheckFastHashTable.java | 19 +-
.../mapjoin/fast/CheckFastRowHashMap.java | 387 ++++++++++
.../fast/TestVectorMapJoinFastBytesHashMap.java | 15 +
.../fast/TestVectorMapJoinFastRowHashMap.java | 718 +++++++++++++++++++
.../exec/vector/mapjoin/fast/VerifyFastRow.java | 397 ++++++++++
.../fast/BinarySortableDeserializeRead.java | 65 +-
.../hive/serde2/fast/DeserializeRead.java | 6 +-
.../hive/serde2/fast/RandomRowObjectSource.java | 423 -----------
.../hadoop/hive/serde2/fast/SerializeWrite.java | 2 +-
.../lazy/fast/LazySimpleDeserializeRead.java | 61 +-
.../fast/LazyBinaryDeserializeRead.java | 146 ++--
.../hive/serde2/SerdeRandomRowSource.java | 423 +++++++++++
.../binarysortable/TestBinarySortableFast.java | 21 +-
.../hive/serde2/lazy/TestLazySimpleFast.java | 22 +-
.../serde2/lazybinary/TestLazyBinaryFast.java | 28 +-
29 files changed, 2744 insertions(+), 704 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
index dd88461..6b89e98 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
@@ -298,8 +298,8 @@ public final class BytesBytesMultiHashMap {
}
/**
- * Read the current value.
- *
+ * Read the current value.
+ *
* @return
* The ByteSegmentRef to the current value read.
*/
@@ -380,29 +380,6 @@ public final class BytesBytesMultiHashMap {
}
/**
- * @return Whether we have read all the values or not.
- */
- public boolean isEof() {
- // LOG.info("BytesBytesMultiHashMap isEof hasRows " + hasRows + " hasList " + hasList + " readIndex " + readIndex + " nextTailOffset " + nextTailOffset);
- if (!hasRows) {
- return true;
- }
-
- if (!hasList) {
- return (readIndex > 0);
- } else {
- // Multiple values.
- if (readIndex <= 1) {
- // Careful: We have not read the list record and 2nd value yet, so nextTailOffset
- // is not valid yet.
- return false;
- } else {
- return (nextTailOffset <= 0);
- }
- }
- }
-
- /**
* Lets go of any references to a hash map.
*/
public void forget() {
@@ -741,7 +718,7 @@ public final class BytesBytesMultiHashMap {
long capacity = refs.length << 1;
expandAndRehashImpl(capacity);
}
-
+
private void expandAndRehashImpl(long capacity) {
long expandTime = System.currentTimeMillis();
final long[] oldRefs = refs;
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
index e966ff1..f4c3b81 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
@@ -413,7 +414,14 @@ public class ReduceRecordSource implements RecordSource {
// VectorizedBatchUtil.displayBytes(keyBytes, 0, keyLength));
keyBinarySortableDeserializeToRow.setBytes(keyBytes, 0, keyLength);
- keyBinarySortableDeserializeToRow.deserialize(batch, 0);
+ try {
+ keyBinarySortableDeserializeToRow.deserialize(batch, 0);
+ } catch (Exception e) {
+ throw new HiveException(
+ "\nDeserializeRead details: " +
+ keyBinarySortableDeserializeToRow.getDetailedReadPositionString(),
+ e);
+ }
for(int i = 0; i < firstValueColumnOffset; i++) {
VectorizedBatchUtil.setRepeatingColumn(batch, i);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
index fca1882..f66916b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
@@ -650,38 +650,26 @@ public final class VectorDeserializeRow<T extends DeserializeRead> {
/**
* Deserialize a row from the range of bytes specified by setBytes.
*
+ * Use getDetailedReadPositionString to get detailed read position information to help
+ * diagnose exceptions that are thrown...
+ *
* @param batch
* @param batchIndex
* @throws IOException
*/
public void deserialize(VectorizedRowBatch batch, int batchIndex) throws IOException {
final int count = isConvert.length;
- int i = 0;
- try {
- while (i < count) {
- if (isConvert[i]) {
- deserializeConvertRowColumn(batch, batchIndex, i);
- } else {
- deserializeRowColumn(batch, batchIndex, i);
- }
- i++; // Increment after the apply which could throw an exception.
+ for (int i = 0; i < count; i++) {
+ if (isConvert[i]) {
+ deserializeConvertRowColumn(batch, batchIndex, i);
+ } else {
+ deserializeRowColumn(batch, batchIndex, i);
}
- } catch (EOFException e) {
- throwMoreDetailedException(e, i);
}
deserializeRead.extraFieldsCheck();
}
- private void throwMoreDetailedException(IOException e, int index) throws EOFException {
- StringBuilder sb = new StringBuilder();
- sb.append("Detail: \"" + e.toString() + "\" occured for field " + index + " of " + sourceTypeInfos.length + " fields (");
- for (int i = 0; i < sourceTypeInfos.length; i++) {
- if (i > 0) {
- sb.append(", ");
- }
- sb.append(((PrimitiveTypeInfo) sourceTypeInfos[i]).getPrimitiveCategory().name());
- }
- sb.append(")");
- throw new EOFException(sb.toString());
+ public String getDetailedReadPositionString() {
+ return deserializeRead.getDetailedReadPositionString();
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index a65cac0..2bdc59b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -820,7 +820,15 @@ public class VectorMapOperator extends AbstractMapOperator {
currentDeserializeRead.set(binComp.getBytes(), 0, binComp.getLength());
// Deserialize and append new row using the current batch size as the index.
- currentVectorDeserializeRow.deserialize(deserializerBatch, deserializerBatch.size++);
+ try {
+ currentVectorDeserializeRow.deserialize(
+ deserializerBatch, deserializerBatch.size++);
+ } catch (Exception e) {
+ throw new HiveException(
+ "\nDeserializeRead detail: " +
+ currentVectorDeserializeRow.getDetailedReadPositionString(),
+ e);
+ }
}
break;
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
index 22b2a17..469f86a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
@@ -134,6 +134,27 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
batch.selectedInUse = saveSelectedInUse;
}
+ protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex,
+ ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult)
+ throws HiveException {
+
+ byte[] bytes = byteSegmentRef.getBytes();
+ int offset = (int) byteSegmentRef.getOffset();
+ int length = byteSegmentRef.getLength();
+ smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
+
+ try {
+ smallTableVectorDeserializeRow.deserialize(batch, batchIndex);
+ } catch (Exception e) {
+ throw new HiveException(
+ "\nHashMapResult detail: " +
+ hashMapResult.getDetailedHashMapResultPositionString() +
+ "\nDeserializeRead detail: " +
+ smallTableVectorDeserializeRow.getDetailedReadPositionString(),
+ e);
+ }
+ }
+
//------------------------------------------------------------------------------------------------
/*
@@ -180,13 +201,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
}
if (smallTableVectorDeserializeRow != null) {
-
- byte[] bytes = byteSegmentRef.getBytes();
- int offset = (int) byteSegmentRef.getOffset();
- int length = byteSegmentRef.getLength();
- smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
-
- smallTableVectorDeserializeRow.deserialize(batch, batchIndex);
+ doSmallTableDeserializeRow(batch, batchIndex,
+ byteSegmentRef, hashMapResult);
}
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table");
@@ -248,12 +264,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
if (smallTableVectorDeserializeRow != null) {
- byte[] bytes = byteSegmentRef.getBytes();
- int offset = (int) byteSegmentRef.getOffset();
- int length = byteSegmentRef.getLength();
- smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
-
- smallTableVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size);
+ doSmallTableDeserializeRow(overflowBatch, overflowBatch.size,
+ byteSegmentRef, hashMapResult);
}
// VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow");
@@ -298,13 +310,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
while (byteSegmentRef != null) {
if (smallTableVectorDeserializeRow != null) {
-
- byte[] bytes = byteSegmentRef.getBytes();
- int offset = (int) byteSegmentRef.getOffset();
- int length = byteSegmentRef.getLength();
- smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
-
- smallTableVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size);
+ doSmallTableDeserializeRow(overflowBatch, overflowBatch.size,
+ byteSegmentRef, hashMapResult);
}
overflowBatch.size++;
@@ -348,10 +355,10 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
}
}
- if (hashMapResult.isEof()) {
+ byteSegmentRef = hashMapResult.next();
+ if (byteSegmentRef == null) {
break;
}
- byteSegmentRef = hashMapResult.next();
// Get ready for a another round of small table values.
overflowBatch.reset();
@@ -543,14 +550,18 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
int offset = bigTable.currentOffset();
int length = bigTable.currentLength();
-// LOG.debug(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length);
-
bigTableVectorDeserializeRow.setBytes(bytes, offset, length);
- bigTableVectorDeserializeRow.deserialize(spillReplayBatch, spillReplayBatch.size);
+ try {
+ bigTableVectorDeserializeRow.deserialize(spillReplayBatch, spillReplayBatch.size);
+ } catch (Exception e) {
+ throw new HiveException(
+ "\nDeserializeRead detail: " +
+ bigTableVectorDeserializeRow.getDetailedReadPositionString(),
+ e);
+ }
spillReplayBatch.size++;
if (spillReplayBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
- // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
process(spillReplayBatch, posBigTable); // call process once we have a full batch
spillReplayBatch.reset();
batchCount++;
@@ -558,7 +569,6 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
}
// Process the row batch that has less than DEFAULT_SIZE rows
if (spillReplayBatch.size > 0) {
- // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
process(spillReplayBatch, posBigTable);
spillReplayBatch.reset();
batchCount++;
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index 5373aad..ee66d5b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -78,8 +78,15 @@ public abstract class VectorMapJoinFastLongHashTable
byte[] keyBytes = currentKey.getBytes();
int keyLength = currentKey.getLength();
keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength);
- if (keyBinarySortableDeserializeRead.readCheckNull()) {
- return;
+ try {
+ if (keyBinarySortableDeserializeRead.readCheckNull()) {
+ return;
+ }
+ } catch (Exception e) {
+ throw new HiveException(
+ "\nDeserializeRead details: " +
+ keyBinarySortableDeserializeRead.getDetailedReadPositionString() +
+ "\nException: " + e.toString());
}
long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
index 985fb1c..bf378ac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
@@ -45,8 +45,15 @@ public class VectorMapJoinFastStringCommon {
byte[] keyBytes = currentKey.getBytes();
int keyLength = currentKey.getLength();
keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength);
- if (keyBinarySortableDeserializeRead.readCheckNull()) {
- return;
+ try {
+ if (keyBinarySortableDeserializeRead.readCheckNull()) {
+ return;
+ }
+ } catch (Exception e) {
+ throw new HiveException(
+ "\nDeserializeRead details: " +
+ keyBinarySortableDeserializeRead.getDetailedReadPositionString() +
+ "\nException: " + e.toString());
}
hashTable.add(
@@ -59,6 +66,7 @@ public class VectorMapJoinFastStringCommon {
public VectorMapJoinFastStringCommon(boolean isOuterJoin) {
this.isOuterJoin = isOuterJoin;
PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo };
- keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos);
+ keyBinarySortableDeserializeRead =
+ new BinarySortableDeserializeRead(primitiveTypeInfos);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index f96e32b..f9c5b34 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -122,9 +122,7 @@ public class VectorMapJoinFastValueStore {
private boolean isSingleRow;
private int cappedCount;
- private boolean haveReadCurrent;
private int readIndex;
- private boolean isEof;
private boolean isNextEof;
private boolean isNextLast;
@@ -153,9 +151,48 @@ public class VectorMapJoinFastValueStore {
cappedCount =
(int) ((valueRefWord & CappedCount.bitMask) >> CappedCount.bitShift);
// Position to beginning.
- haveReadCurrent = false;
readIndex = 0;
- isEof = false;
+ }
+
+ /**
+ * Get detailed HashMap result position information to help diagnose exceptions.
+ */
+ @Override
+ public String getDetailedHashMapResultPositionString() {
+ StringBuffer sb = new StringBuffer();
+
+ sb.append("Read index ");
+ sb.append(readIndex);
+ if (isSingleRow) {
+ sb.append(" single row");
+ } else {
+ sb.append(" capped count ");
+ sb.append(cappedCount);
+ }
+
+ if (readIndex > 0) {
+ sb.append(" byteSegmentRef is byte[] of length ");
+ sb.append(byteSegmentRef.getBytes().length);
+ sb.append(" at offset ");
+ sb.append(byteSegmentRef.getOffset());
+ sb.append(" for length ");
+ sb.append(byteSegmentRef.getLength());
+ if (!isSingleRow) {
+ sb.append(" (isNextEof ");
+ sb.append(isNextEof);
+ sb.append(" isNextLast ");
+ sb.append(isNextLast);
+ sb.append(" nextAbsoluteValueOffset ");
+ sb.append(nextAbsoluteValueOffset);
+ sb.append(" isNextValueLengthSmall ");
+ sb.append(isNextValueLengthSmall);
+ sb.append(" nextSmallValueLength ");
+ sb.append(nextSmallValueLength);
+ sb.append(")");
+ }
+ }
+
+ return sb.toString();
}
@Override
@@ -193,9 +230,7 @@ public class VectorMapJoinFastValueStore {
}
// Position to beginning.
- haveReadCurrent = false;
readIndex = 0;
- isEof = false;
return internalRead();
}
@@ -363,18 +398,9 @@ public class VectorMapJoinFastValueStore {
}
@Override
- public boolean isEof() {
- if (!hasRows) {
- return true;
- }
- return isEof;
- }
-
- @Override
public void forget() {
}
-
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java
index fa6dedb..a5dfba8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java
@@ -57,7 +57,7 @@ public abstract class VectorMapJoinHashMapResult extends VectorMapJoinHashTableR
public abstract ByteSegmentRef next();
/**
- * @return Whether reading is at the end.
+ * Get detailed HashMap result position information to help diagnose exceptions.
*/
- public abstract boolean isEof();
+ public abstract String getDetailedHashMapResultPositionString();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java
index b7da976..eada694 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java
@@ -89,11 +89,6 @@ public class VectorMapJoinOptimizedHashMap
}
@Override
- public boolean isEof() {
- return bytesBytesMultiHashMapResult.isEof();
- }
-
- @Override
public void forget() {
bytesBytesMultiHashMapResult.forget();
super.forget();
@@ -106,6 +101,11 @@ public class VectorMapJoinOptimizedHashMap
sb.append("isSingleRow " + (joinResult() == JoinUtil.JoinResult.MATCH ? isSingleRow() : "<none>") + ")");
return sb.toString();
}
+
+ @Override
+ public String getDetailedHashMapResultPositionString() {
+ return "(Not supported yet)";
+ }
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
index aed9214..c1d7c72 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java
@@ -133,8 +133,6 @@ public class TestBytesBytesMultiHashMap {
hs.add(ref.copy());
ref = hashMapResult.next();
}
- } else {
- assertTrue(hashMapResult.isEof());
}
assertEquals(state, count);
assertEquals(values.length, count);
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
index c55d951..9c4a973 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
@@ -22,7 +22,6 @@ import java.util.Arrays;
import java.util.Random;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import junit.framework.TestCase;
@@ -51,7 +50,7 @@ public class TestVectorRowObject extends TestCase {
String[] emptyScratchTypeNames = new String[0];
- RandomRowObjectSource source = new RandomRowObjectSource();
+ VectorRandomRowSource source = new VectorRandomRowSource();
source.init(r);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
index da69ee3..c6704f9 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
@@ -48,7 +48,6 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
-import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
@@ -85,7 +84,7 @@ public class TestVectorSerDeRow extends TestCase {
}
void deserializeAndVerify(Output output, DeserializeRead deserializeRead,
- RandomRowObjectSource source, Object[] expectedRow)
+ VectorRandomRowSource source, Object[] expectedRow)
throws HiveException, IOException {
deserializeRead.set(output.getData(), 0, output.getLength());
PrimitiveCategory[] primitiveCategories = source.primitiveCategories();
@@ -281,12 +280,11 @@ public class TestVectorSerDeRow extends TestCase {
}
deserializeRead.extraFieldsCheck();
TestCase.assertTrue(!deserializeRead.readBeyondConfiguredFieldsWarned());
- TestCase.assertTrue(!deserializeRead.readBeyondBufferRangeWarned());
TestCase.assertTrue(!deserializeRead.bufferRangeHasExtraDataWarned());
}
void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow,
- DeserializeRead deserializeRead, RandomRowObjectSource source, Object[][] randomRows,
+ DeserializeRead deserializeRead, VectorRandomRowSource source, Object[][] randomRows,
int firstRandomRowIndex) throws HiveException, IOException {
Output output = new Output();
@@ -311,7 +309,7 @@ public class TestVectorSerDeRow extends TestCase {
String[] emptyScratchTypeNames = new String[0];
- RandomRowObjectSource source = new RandomRowObjectSource();
+ VectorRandomRowSource source = new VectorRandomRowSource();
source.init(r);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
@@ -389,7 +387,7 @@ public class TestVectorSerDeRow extends TestCase {
}
}
- private Output serializeRow(Object[] row, RandomRowObjectSource source, SerializeWrite serializeWrite) throws HiveException, IOException {
+ private Output serializeRow(Object[] row, VectorRandomRowSource source, SerializeWrite serializeWrite) throws HiveException, IOException {
Output output = new Output();
serializeWrite.set(output);
PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
@@ -542,7 +540,7 @@ public class TestVectorSerDeRow extends TestCase {
String[] emptyScratchTypeNames = new String[0];
- RandomRowObjectSource source = new RandomRowObjectSource();
+ VectorRandomRowSource source = new VectorRandomRowSource();
source.init(r);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
new file mode 100644
index 0000000..349c76a
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -0,0 +1,458 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.RandomTypeUtil;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hive.common.util.DateUtils;
+
+/**
+ * Generate object inspector and random row object[].
+ */
+public class VectorRandomRowSource {
+
+ private Random r;
+
+ private int columnCount;
+
+ private List<String> typeNames;
+
+ private PrimitiveCategory[] primitiveCategories;
+
+ private PrimitiveTypeInfo[] primitiveTypeInfos;
+
+ private List<ObjectInspector> primitiveObjectInspectorList;
+
+ private StructObjectInspector rowStructObjectInspector;
+
+ public List<String> typeNames() {
+ return typeNames;
+ }
+
+ public PrimitiveCategory[] primitiveCategories() {
+ return primitiveCategories;
+ }
+
+ public PrimitiveTypeInfo[] primitiveTypeInfos() {
+ return primitiveTypeInfos;
+ }
+
+ public StructObjectInspector rowStructObjectInspector() {
+ return rowStructObjectInspector;
+ }
+
+ public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
+ ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
+ new ArrayList<ObjectInspector>(partialFieldCount);
+ List<String> columnNames = new ArrayList<String>(partialFieldCount);
+ for (int i = 0; i < partialFieldCount; i++) {
+ columnNames.add(String.format("partial%d", i));
+ partialPrimitiveObjectInspectorList.add(
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ primitiveTypeInfos[i]));
+ }
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(
+ columnNames, primitiveObjectInspectorList);
+ }
+
+ public void init(Random r) {
+ this.r = r;
+ chooseSchema();
+ }
+
+ /*
+ * For now, exclude CHAR until we determine why there is a difference (blank padding)
+ * serializing with LazyBinarySerializeWrite and the regular SerDe...
+ */
+ private static String[] possibleHiveTypeNames = {
+ "boolean",
+ "tinyint",
+ "smallint",
+ "int",
+ "bigint",
+ "date",
+ "float",
+ "double",
+ "string",
+// "char",
+ "varchar",
+ "binary",
+ "date",
+ "timestamp",
+ "interval_year_month",
+ "interval_day_time",
+ "decimal"
+ };
+
+ private void chooseSchema() {
+ HashSet hashSet = null;
+ boolean allTypes;
+ boolean onlyOne = (r.nextInt(100) == 7);
+ if (onlyOne) {
+ columnCount = 1;
+ allTypes = false;
+ } else {
+ allTypes = r.nextBoolean();
+ if (allTypes) {
+ // One of each type.
+ columnCount = possibleHiveTypeNames.length;
+ hashSet = new HashSet<Integer>();
+ } else {
+ columnCount = 1 + r.nextInt(20);
+ }
+ }
+ typeNames = new ArrayList<String>(columnCount);
+ primitiveCategories = new PrimitiveCategory[columnCount];
+ primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
+ primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
+ List<String> columnNames = new ArrayList<String>(columnCount);
+ for (int c = 0; c < columnCount; c++) {
+ columnNames.add(String.format("col%d", c));
+ String typeName;
+
+ if (onlyOne) {
+ typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
+ } else {
+ int typeNum;
+ if (allTypes) {
+ while (true) {
+ typeNum = r.nextInt(possibleHiveTypeNames.length);
+ Integer typeNumInteger = new Integer(typeNum);
+ if (!hashSet.contains(typeNumInteger)) {
+ hashSet.add(typeNumInteger);
+ break;
+ }
+ }
+ } else {
+ typeNum = r.nextInt(possibleHiveTypeNames.length);
+ }
+ typeName = possibleHiveTypeNames[typeNum];
+ }
+ if (typeName.equals("char")) {
+ int maxLength = 1 + r.nextInt(100);
+ typeName = String.format("char(%d)", maxLength);
+ } else if (typeName.equals("varchar")) {
+ int maxLength = 1 + r.nextInt(100);
+ typeName = String.format("varchar(%d)", maxLength);
+ } else if (typeName.equals("decimal")) {
+ typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
+ }
+ PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+ primitiveTypeInfos[c] = primitiveTypeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+ primitiveCategories[c] = primitiveCategory;
+ primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+ typeNames.add(typeName);
+ }
+ rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
+ }
+
+ public Object[][] randomRows(int n) {
+ Object[][] result = new Object[n][];
+ for (int i = 0; i < n; i++) {
+ result[i] = randomRow();
+ }
+ return result;
+ }
+
+ public Object[] randomRow() {
+ Object row[] = new Object[columnCount];
+ for (int c = 0; c < columnCount; c++) {
+ Object object = randomObject(c);
+ if (object == null) {
+ throw new Error("Unexpected null for column " + c);
+ }
+ row[c] = getWritableObject(c, object);
+ if (row[c] == null) {
+ throw new Error("Unexpected null for writable for column " + c);
+ }
+ }
+ return row;
+ }
+
+ public Object[] randomRow(int columnCount) {
+ return randomRow(columnCount, r, primitiveObjectInspectorList, primitiveCategories,
+ primitiveTypeInfos);
+ }
+
+ public static Object[] randomRow(int columnCount, Random r,
+ List<ObjectInspector> primitiveObjectInspectorList, PrimitiveCategory[] primitiveCategories,
+ PrimitiveTypeInfo[] primitiveTypeInfos) {
+ Object row[] = new Object[columnCount];
+ for (int c = 0; c < columnCount; c++) {
+ Object object = randomObject(c, r, primitiveCategories, primitiveTypeInfos);
+ if (object == null) {
+ throw new Error("Unexpected null for column " + c);
+ }
+ row[c] = getWritableObject(c, object, primitiveObjectInspectorList,
+ primitiveCategories, primitiveTypeInfos);
+ if (row[c] == null) {
+ throw new Error("Unexpected null for writable for column " + c);
+ }
+ }
+ return row;
+ }
+
+ public static void sort(Object[][] rows, ObjectInspector oi) {
+ for (int i = 0; i < rows.length; i++) {
+ for (int j = i + 1; j < rows.length; j++) {
+ if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
+ Object[] t = rows[i];
+ rows[i] = rows[j];
+ rows[j] = t;
+ }
+ }
+ }
+ }
+
+ public void sort(Object[][] rows) {
+ VectorRandomRowSource.sort(rows, rowStructObjectInspector);
+ }
+
+ public Object getWritableObject(int column, Object object) {
+ return getWritableObject(column, object, primitiveObjectInspectorList,
+ primitiveCategories, primitiveTypeInfos);
+ }
+
+ public static Object getWritableObject(int column, Object object,
+ List<ObjectInspector> primitiveObjectInspectorList, PrimitiveCategory[] primitiveCategories,
+ PrimitiveTypeInfo[] primitiveTypeInfos) {
+ ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
+ PrimitiveCategory primitiveCategory = primitiveCategories[column];
+ PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
+ case BYTE:
+ return ((WritableByteObjectInspector) objectInspector).create((byte) object);
+ case SHORT:
+ return ((WritableShortObjectInspector) objectInspector).create((short) object);
+ case INT:
+ return ((WritableIntObjectInspector) objectInspector).create((int) object);
+ case LONG:
+ return ((WritableLongObjectInspector) objectInspector).create((long) object);
+ case DATE:
+ return ((WritableDateObjectInspector) objectInspector).create((Date) object);
+ case FLOAT:
+ return ((WritableFloatObjectInspector) objectInspector).create((float) object);
+ case DOUBLE:
+ return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
+ case STRING:
+ return ((WritableStringObjectInspector) objectInspector).create((String) object);
+ case CHAR:
+ {
+ WritableHiveCharObjectInspector writableCharObjectInspector =
+ new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
+ return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
+ }
+ case VARCHAR:
+ {
+ WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
+ new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
+ return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
+ }
+ case BINARY:
+ return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
+ case TIMESTAMP:
+ return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
+ case INTERVAL_YEAR_MONTH:
+ return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
+ case INTERVAL_DAY_TIME:
+ return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
+ case DECIMAL:
+ {
+ WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
+ new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
+ return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
+ }
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategory);
+ }
+ }
+
+ public Object randomObject(int column) {
+ return randomObject(column, r, primitiveCategories, primitiveTypeInfos);
+ }
+
+ public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories,
+ PrimitiveTypeInfo[] primitiveTypeInfos) {
+ PrimitiveCategory primitiveCategory = primitiveCategories[column];
+ PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return Boolean.valueOf(r.nextInt(1) == 1);
+ case BYTE:
+ return Byte.valueOf((byte) r.nextInt());
+ case SHORT:
+ return Short.valueOf((short) r.nextInt());
+ case INT:
+ return Integer.valueOf(r.nextInt());
+ case LONG:
+ return Long.valueOf(r.nextLong());
+ case DATE:
+ return RandomTypeUtil.getRandDate(r);
+ case FLOAT:
+ return Float.valueOf(r.nextFloat() * 10 - 5);
+ case DOUBLE:
+ return Double.valueOf(r.nextDouble() * 10 - 5);
+ case STRING:
+ return RandomTypeUtil.getRandString(r);
+ case CHAR:
+ return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
+ case VARCHAR:
+ return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
+ case BINARY:
+ return getRandBinary(r, 1 + r.nextInt(100));
+ case TIMESTAMP:
+ return RandomTypeUtil.getRandTimestamp(r);
+ case INTERVAL_YEAR_MONTH:
+ return getRandIntervalYearMonth(r);
+ case INTERVAL_DAY_TIME:
+ return getRandIntervalDayTime(r);
+ case DECIMAL:
+ return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategory);
+ }
+ }
+
+ public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
+ int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
+ String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+ HiveChar hiveChar = new HiveChar(randomString, maxLength);
+ return hiveChar;
+ }
+
+ public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
+ int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
+ String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+ HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
+ return hiveVarchar;
+ }
+
+ public static byte[] getRandBinary(Random r, int len){
+ byte[] bytes = new byte[len];
+ for (int j = 0; j < len; j++){
+ bytes[j] = Byte.valueOf((byte) r.nextInt());
+ }
+ return bytes;
+ }
+
+ private static final String DECIMAL_CHARS = "0123456789";
+
+ public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
+ while (true) {
+ StringBuilder sb = new StringBuilder();
+ int precision = 1 + r.nextInt(18);
+ int scale = 0 + r.nextInt(precision + 1);
+
+ int integerDigits = precision - scale;
+
+ if (r.nextBoolean()) {
+ sb.append("-");
+ }
+
+ if (integerDigits == 0) {
+ sb.append("0");
+ } else {
+ sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
+ }
+ if (scale != 0) {
+ sb.append(".");
+ sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
+ }
+
+ HiveDecimal bd = HiveDecimal.create(sb.toString());
+ if (bd.scale() > bd.precision()) {
+ // Sometimes weird decimals are produced?
+ continue;
+ }
+
+ return bd;
+ }
+ }
+
+ public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
+ String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+ String intervalYearMonthStr = String.format("%s%d-%d",
+ yearMonthSignStr,
+ Integer.valueOf(1800 + r.nextInt(500)), // year
+ Integer.valueOf(0 + r.nextInt(12))); // month
+ HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
+ return intervalYearMonthVal;
+ }
+
+ public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
+ String optionalNanos = "";
+ if (r.nextInt(2) == 1) {
+ optionalNanos = String.format(".%09d",
+ Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
+ }
+ String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+ String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
+ yearMonthSignStr,
+ Integer.valueOf(1 + r.nextInt(28)), // day
+ Integer.valueOf(0 + r.nextInt(24)), // hour
+ Integer.valueOf(0 + r.nextInt(60)), // minute
+ Integer.valueOf(0 + r.nextInt(60)), // second
+ optionalNanos);
+ HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
+ return intervalDayTimeVal;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
index 3a23584..28a4dc6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
@@ -23,6 +23,7 @@ import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
+import java.util.Properties;
import java.util.Random;
import java.util.TreeMap;
@@ -43,12 +44,14 @@ import static org.junit.Assert.*;
public class CheckFastHashTable {
- public static boolean findMatch(byte[] valueBytes, List<byte[]> actualValues, int actualCount, boolean[] taken) {
+ public static boolean findMatch(int valueIndex, byte[] valueBytes, List<byte[]> actualValues,
+ int actualCount, boolean[] actualTaken, int[] actualToValueMap) {
for (int i = 0; i < actualCount; i++) {
- if (!taken[i]) {
+ if (!actualTaken[i]) {
byte[] actualBytes = actualValues.get(i);
if (StringExpr.compare(valueBytes, 0, valueBytes.length, actualBytes, 0, actualBytes.length) == 0) {
- taken[i] = true;
+ actualToValueMap[i] = valueIndex;
+ actualTaken[i] = true;
return true;
}
}
@@ -56,7 +59,7 @@ public class CheckFastHashTable {
return false;
}
- public static void verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult,
+ public static int[] verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult,
List<byte[]> values) {
int valueCount = values.size();
@@ -87,15 +90,16 @@ public class CheckFastHashTable {
TestCase.fail("values.size() " + valueCount + " does not match actualCount " + actualCount);
}
- boolean[] taken = new boolean[actualCount];
+ boolean[] actualTaken = new boolean[actualCount];
+ int[] actualToValueMap = new int[actualCount];
for (int i = 0; i < actualCount; i++) {
byte[] valueBytes = values.get(i);
- if (!findMatch(valueBytes, actualValues, actualCount, taken)) {
+ if (!findMatch(i, valueBytes, actualValues, actualCount, actualTaken, actualToValueMap)) {
List<Integer> availableLengths = new ArrayList<Integer>();
for (int a = 0; a < actualCount; a++) {
- if (!taken[a]) {
+ if (!actualTaken[a]) {
availableLengths.add(actualValues.get(a).length);
}
}
@@ -103,6 +107,7 @@ public class CheckFastHashTable {
", availableLengths " + availableLengths.toString() + " of " + actualCount + " total)");
}
}
+ return actualToValueMap;
}
/*
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
new file mode 100644
index 0000000..0bcfb56
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
@@ -0,0 +1,387 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeMap;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.serde2.WriteBuffers;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+import com.google.common.base.Preconditions;
+
+public class CheckFastRowHashMap extends CheckFastHashTable {
+
+ public static void verifyHashMapRows(List<Object[]> rows, int[] actualToValueMap,
+ VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos) throws IOException {
+
+ final int count = rows.size();
+ final int columnCount = typeInfos.length;
+
+ WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
+
+ for (int a = 0; a < count; a++) {
+
+ int valueIndex = actualToValueMap[a];
+
+ Object[] row = rows.get(valueIndex);
+
+ byte[] bytes = ref.getBytes();
+ int offset = (int) ref.getOffset();
+ int length = ref.getLength();
+
+ LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
+ new LazyBinaryDeserializeRead(typeInfos);
+
+ lazyBinaryDeserializeRead.set(bytes, offset, length);
+
+ for (int index = 0; index < columnCount; index++) {
+ Writable writable = (Writable) row[index];
+ VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable);
+ }
+ lazyBinaryDeserializeRead.extraFieldsCheck();
+ TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
+
+ TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
+
+ ref = hashMapResult.next();
+ if (a == count - 1) {
+ TestCase.assertTrue (ref == null);
+ } else {
+ TestCase.assertTrue (ref != null);
+ }
+ }
+ }
+
+ private static String debugDetailedReadPositionString;
+
+ private static String debugDetailedHashMapResultPositionString;
+
+ private static String debugExceptionMessage;
+ private static StackTraceElement[] debugStackTrace;
+
+ public static void verifyHashMapRowsMore(List<Object[]> rows, int[] actualToValueMap,
+ VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos,
+ int clipIndex, boolean useExactBytes) throws IOException {
+
+ final int count = rows.size();
+ final int columnCount = typeInfos.length;
+
+ WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
+
+ for (int a = 0; a < count; a++) {
+
+ int valueIndex = actualToValueMap[a];
+
+ Object[] row = rows.get(valueIndex);
+
+ byte[] bytes = ref.getBytes();
+ int offset = (int) ref.getOffset();
+ int length = ref.getLength();
+ if (a == clipIndex) {
+ length--;
+ }
+
+ if (useExactBytes) {
+ // Use exact byte array which might generate array out of bounds...
+ bytes = Arrays.copyOfRange(bytes, offset, offset + length);
+ offset = 0;
+ }
+
+ LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
+ new LazyBinaryDeserializeRead(typeInfos);
+
+ lazyBinaryDeserializeRead.set(bytes, offset, length);
+
+ boolean thrown = false;
+ Exception saveException = null;
+ boolean notExpected = false;
+ int index = 0;
+ try {
+ for (index = 0; index < columnCount; index++) {
+ Writable writable = (Writable) row[index];
+ VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable);
+ }
+ } catch (Exception e) {
+ thrown = true;
+ saveException = e;
+ debugDetailedReadPositionString = lazyBinaryDeserializeRead.getDetailedReadPositionString();
+
+ debugDetailedHashMapResultPositionString = hashMapResult.getDetailedHashMapResultPositionString();
+
+ debugExceptionMessage = saveException.getMessage();
+ debugStackTrace = saveException.getStackTrace();
+ }
+ if (a == clipIndex) {
+ if (!thrown) {
+ TestCase.fail("Expecting an exception to be thrown for the clipped case...");
+ } else {
+ TestCase.assertTrue(saveException != null);
+ if (saveException instanceof EOFException) {
+ // This is the one we are expecting.
+ } else if (saveException instanceof ArrayIndexOutOfBoundsException) {
+ notExpected = true;
+ } else {
+ TestCase.fail("Expecting an EOFException to be thrown for the clipped case...");
+ }
+ }
+ } else {
+ if (thrown) {
+ TestCase.fail("Not expecting an exception to be thrown for the non-clipped case...");
+ }
+ lazyBinaryDeserializeRead.extraFieldsCheck();
+ TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
+
+ TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
+ }
+
+ ref = hashMapResult.next();
+ if (a == count - 1) {
+ TestCase.assertTrue (ref == null);
+ } else {
+ TestCase.assertTrue (ref != null);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: row and byte[] x Hash Table: HashMap
+ */
+ public static class FastRowHashMapElement {
+ private byte[] key;
+ private Object[] keyRow;
+ private List<byte[]> values;
+ private List<Object[]> valueRows;
+
+ public FastRowHashMapElement(byte[] key, Object[] keyRow, byte[] firstValue,
+ Object[] valueRow) {
+ this.key = key;
+ this.keyRow = keyRow;
+ values = new ArrayList<byte[]>();
+ values.add(firstValue);
+ valueRows = new ArrayList<Object[]>();
+ valueRows.add(valueRow);
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+
+ public Object[] getKeyRow() {
+ return keyRow;
+ }
+
+ public int getCount() {
+ return values.size();
+ }
+
+ public List<byte[]> getValues() {
+ return values;
+ }
+
+ public List<Object[]> getValueRows() {
+ return valueRows;
+ }
+
+ public void add(byte[] value, Object[] valueRow) {
+ values.add(value);
+ valueRows.add(valueRow);
+ }
+ }
+
+ /*
+ * Verify table for Key: row and byte[] x Hash Table: HashMap
+ */
+ public static class VerifyFastRowHashMap {
+
+ private int count;
+
+ private FastRowHashMapElement[] array;
+
+ private TreeMap<BytesWritable, Integer> keyValueMap;
+
+ public VerifyFastRowHashMap() {
+ count = 0;
+ array = new FastRowHashMapElement[50];
+
+ // We use BytesWritable because it supports Comparable for our TreeMap.
+ keyValueMap = new TreeMap<BytesWritable, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ return keyValueMap.containsKey(keyBytesWritable);
+ }
+
+ public void add(byte[] key, Object[] keyRow, byte[] value, Object[] valueRow) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ if (keyValueMap.containsKey(keyBytesWritable)) {
+ int index = keyValueMap.get(keyBytesWritable);
+ array[index].add(value, valueRow);
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastRowHashMapElement[] newArray = new FastRowHashMapElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastRowHashMapElement(key, keyRow, value, valueRow);
+ keyValueMap.put(keyBytesWritable, count);
+ count++;
+ }
+ }
+
+ public byte[] addRandomExisting(byte[] value, Object[] valueRow, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].add(value, valueRow);
+ return array[index].getKey();
+ }
+
+ public byte[] getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public List<byte[]> getValues(int index) {
+ return array[index].getValues();
+ }
+
+ public void verify(VectorMapJoinFastHashTable map,
+ HashTableKeyType hashTableKeyType,
+ PrimitiveTypeInfo[] valuePrimitiveTypeInfos, boolean doClipping,
+ boolean useExactBytes, Random random) throws IOException {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastRowHashMapElement element = array[index];
+
+ List<byte[]> values = element.getValues();
+
+ VectorMapJoinHashMapResult hashMapResult = null;
+ JoinUtil.JoinResult joinResult = JoinUtil.JoinResult.NOMATCH;
+ switch (hashTableKeyType) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ {
+ Object[] keyRow = element.getKeyRow();
+ Object keyObject = keyRow[0];
+ VectorMapJoinFastLongHashMap longHashMap = (VectorMapJoinFastLongHashMap) map;
+ hashMapResult = longHashMap.createHashMapResult();
+ long longKey;
+ switch (hashTableKeyType) {
+ case BOOLEAN:
+ longKey = ((BooleanWritable) keyObject).get() ? 1 : 0;
+ break;
+ case BYTE:
+ longKey = ((ByteWritable) keyObject).get();
+ break;
+ case SHORT:
+ longKey = ((ShortWritable) keyObject).get();
+ break;
+ case INT:
+ longKey = ((IntWritable) keyObject).get();
+ break;
+ case LONG:
+ longKey = ((LongWritable) keyObject).get();
+ break;
+ default:
+ throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
+ }
+ joinResult = longHashMap.lookup(longKey, hashMapResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+ }
+ break;
+ case STRING:
+ {
+ Object[] keyRow = element.getKeyRow();
+ Object keyObject = keyRow[0];
+ VectorMapJoinFastStringHashMap stringHashMap = (VectorMapJoinFastStringHashMap) map;
+ hashMapResult = stringHashMap.createHashMapResult();
+ Text text = (Text) keyObject;
+ byte[] bytes = text.getBytes();
+ int length = text.getLength();
+ joinResult = stringHashMap.lookup(bytes, 0, length, hashMapResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+ }
+ break;
+ case MULTI_KEY:
+ {
+ byte[] keyBytes = element.getKey();
+ VectorMapJoinFastMultiKeyHashMap stringHashMap = (VectorMapJoinFastMultiKeyHashMap) map;
+ hashMapResult = stringHashMap.createHashMapResult();
+ joinResult = stringHashMap.lookup(keyBytes, 0, keyBytes.length, hashMapResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+ }
+ break;
+ default:
+ throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
+ }
+
+ int[] actualToValueMap = verifyHashMapValues(hashMapResult, values);
+
+ List<Object[]> rows = element.getValueRows();
+ if (!doClipping && !useExactBytes) {
+ verifyHashMapRows(rows, actualToValueMap, hashMapResult, valuePrimitiveTypeInfos);
+ } else {
+ int clipIndex = random.nextInt(rows.size());
+ verifyHashMapRowsMore(rows, actualToValueMap, hashMapResult, valuePrimitiveTypeInfos,
+ clipIndex, useExactBytes);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
index bbfa65f..8525e99 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
@@ -269,4 +269,19 @@ public class TestVectorMapJoinFastBytesHashMap extends CommonFastHashTable {
int keyCount = 1000;
addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
}
+
+ @Test
+ public void testReallyBig() throws Exception {
+ random = new Random(42662);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,LARGE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
+
+ VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+ int keyCount = 1000000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
}
[2/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast
DeserializeRead classes; display better exception information;
add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
new file mode 100644
index 0000000..3f02eb3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
@@ -0,0 +1,718 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastRowHashMap.VerifyFastRowHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.junit.Test;
+
+/*
+ * An multi-key value hash map optimized for vector map join.
+ *
+ * The key is uninterpreted bytes.
+ */
+public class TestVectorMapJoinFastRowHashMap extends CommonFastHashTable {
+
+ private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows,
+ VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType,
+ VerifyFastRowHashMap verifyTable, String[] keyTypeNames,
+ boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
+
+ final int keyCount = keyTypeNames.length;
+ PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
+ PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
+ ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList =
+ new ArrayList<ObjectInspector>(keyCount);
+
+ for (int i = 0; i < keyCount; i++) {
+ PrimitiveTypeInfo primitiveTypeInfo =
+ (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
+ keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+ keyPrimitiveCategories[i] = primitiveCategory;
+ keyPrimitiveObjectInspectorList.add(
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+ }
+
+ boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
+ Arrays.fill(keyColumnSortOrderIsDesc, false);
+ byte[] keyColumnNullMarker = new byte[keyCount];
+ Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
+ byte[] keyColumnNotNullMarker = new byte[keyCount];
+ Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
+
+ BinarySortableSerializeWrite keySerializeWrite =
+ new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc,
+ keyColumnNullMarker, keyColumnNotNullMarker);
+
+
+ PrimitiveTypeInfo[] valuePrimitiveTypeInfos = valueSource.primitiveTypeInfos();
+ final int columnCount = valuePrimitiveTypeInfos.length;
+
+ SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
+
+ final int count = rows.length;
+ for (int i = 0; i < count; i++) {
+
+ Object[] valueRow = rows[i];
+
+ Output valueOutput = new Output();
+ ((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
+
+ for (int index = 0; index < columnCount; index++) {
+
+ Writable writable = (Writable) valueRow[index];
+
+ VerifyFastRow.serializeWrite(valueSerializeWrite, valuePrimitiveTypeInfos[index], writable);
+ }
+
+ byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
+
+ // Add a new key or add a value to an existing key?
+ byte[] key;
+ if (random.nextBoolean() || verifyTable.getCount() == 0) {
+ Object[] keyRow =
+ VectorRandomRowSource.randomRow(keyCount, random, keyPrimitiveObjectInspectorList,
+ keyPrimitiveCategories, keyPrimitiveTypeInfos);
+
+ Output keyOutput = new Output();
+ keySerializeWrite.set(keyOutput);
+
+ for (int index = 0; index < keyCount; index++) {
+
+ Writable writable = (Writable) keyRow[index];
+
+ VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], writable);
+ }
+
+ key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
+
+ verifyTable.add(key, keyRow, value, valueRow);
+ } else {
+ key = verifyTable.addRandomExisting(value, valueRow, random);
+ }
+
+ // Serialize keyRow into key bytes.
+ BytesWritable keyWritable = new BytesWritable(key);
+ BytesWritable valueWritable = new BytesWritable(value);
+ map.putRow(keyWritable, valueWritable);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map, hashTableKeyType, valuePrimitiveTypeInfos,
+ doClipping, useExactBytes, random);
+ }
+
+ @Test
+ public void testBigIntRows() throws Exception {
+ random = new Random(927337);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.LONG, verifyTable,
+ new String[] { "bigint" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testIntRows() throws Exception {
+ random = new Random(927337);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.INT,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.INT, verifyTable,
+ new String[] { "int" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testStringRows() throws Exception {
+ random = new Random(927337);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastStringHashMap map =
+ new VectorMapJoinFastStringHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.STRING, verifyTable,
+ new String[] { "string" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRows1() throws Exception {
+ random = new Random(833);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "int", "int" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRows2() throws Exception {
+ random = new Random(833099);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "string", "string" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRows3() throws Exception {
+ random = new Random(833099);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "bigint", "timestamp", "double" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testBigIntRowsClipped() throws Exception {
+ random = new Random(326232);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.LONG, verifyTable,
+ new String[] { "bigint" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testIntRowsClipped() throws Exception {
+ random = new Random(326232);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.INT,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.INT, verifyTable,
+ new String[] { "int" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testStringRowsClipped() throws Exception {
+ random = new Random(326232);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastStringHashMap map =
+ new VectorMapJoinFastStringHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.STRING, verifyTable,
+ new String[] { "string" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRowsClipped1() throws Exception {
+ random = new Random(2331);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "varchar(20)", "date", "interval_day_time" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRowsClipped2() throws Exception {
+ random = new Random(7403);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "varchar(20)", "varchar(40)" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRowsClipped3() throws Exception {
+ random = new Random(99);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "float", "tinyint" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+
+ @Test
+ public void testBigIntRowsExact() throws Exception {
+ random = new Random(27722);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.LONG, verifyTable,
+ new String[] { "bigint" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testIntRowsExact() throws Exception {
+ random = new Random(8238383);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.INT,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.INT, verifyTable,
+ new String[] { "int" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testStringRowsExact() throws Exception {
+ random = new Random(8235);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastStringHashMap map =
+ new VectorMapJoinFastStringHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.STRING, verifyTable,
+ new String[] { "string" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsExact1() throws Exception {
+ random = new Random(8235);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "string", "string", "string", "string" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsExact2() throws Exception {
+ random = new Random(8235);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "smallint" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsExact3() throws Exception {
+ random = new Random(8235);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "int", "binary" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testBigIntRowsClippedExact() throws Exception {
+ random = new Random(2122);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.LONG, verifyTable,
+ new String[] { "bigint" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testIntRowsClippedExact() throws Exception {
+ random = new Random(7520);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.INT,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.INT, verifyTable,
+ new String[] { "int" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testStringRowsClippedExact() throws Exception {
+ random = new Random(7539);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastStringHashMap map =
+ new VectorMapJoinFastStringHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.STRING, verifyTable,
+ new String[] { "string" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsClippedExact1() throws Exception {
+ random = new Random(13);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "interval_year_month", "decimal(12,8)" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsClippedExact2() throws Exception {
+ random = new Random(12);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "bigint", "string", "int" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsClippedExact3() throws Exception {
+ random = new Random(7);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "bigint", "string", "varchar(5000)" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
new file mode 100644
index 0000000..118e9e2
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
@@ -0,0 +1,397 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * (Copy of VerifyFast from serde).
+ *
+ */
+public class VerifyFastRow {
+
+ public static void verifyDeserializeRead(DeserializeRead deserializeRead,
+ PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+
+ boolean isNull;
+
+ isNull = deserializeRead.readCheckNull();
+ if (isNull) {
+ if (writable != null) {
+ TestCase.fail("Field reports null but object is not null");
+ }
+ return;
+ } else if (writable == null) {
+ TestCase.fail("Field report not null but object is null");
+ }
+ switch (primitiveTypeInfo.getPrimitiveCategory()) {
+ case BOOLEAN:
+ {
+ boolean value = deserializeRead.currentBoolean;
+ if (!(writable instanceof BooleanWritable)) {
+ TestCase.fail("Boolean expected writable not Boolean");
+ }
+ boolean expected = ((BooleanWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case BYTE:
+ {
+ byte value = deserializeRead.currentByte;
+ if (!(writable instanceof ByteWritable)) {
+ TestCase.fail("Byte expected writable not Byte");
+ }
+ byte expected = ((ByteWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
+ }
+ }
+ break;
+ case SHORT:
+ {
+ short value = deserializeRead.currentShort;
+ if (!(writable instanceof ShortWritable)) {
+ TestCase.fail("Short expected writable not Short");
+ }
+ short expected = ((ShortWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case INT:
+ {
+ int value = deserializeRead.currentInt;
+ if (!(writable instanceof IntWritable)) {
+ TestCase.fail("Integer expected writable not Integer");
+ }
+ int expected = ((IntWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case LONG:
+ {
+ long value = deserializeRead.currentLong;
+ if (!(writable instanceof LongWritable)) {
+ TestCase.fail("Long expected writable not Long");
+ }
+ Long expected = ((LongWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case FLOAT:
+ {
+ float value = deserializeRead.currentFloat;
+ if (!(writable instanceof FloatWritable)) {
+ TestCase.fail("Float expected writable not Float");
+ }
+ float expected = ((FloatWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case DOUBLE:
+ {
+ double value = deserializeRead.currentDouble;
+ if (!(writable instanceof DoubleWritable)) {
+ TestCase.fail("Double expected writable not Double");
+ }
+ double expected = ((DoubleWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case STRING:
+ {
+ byte[] stringBytes = Arrays.copyOfRange(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+ Text text = new Text(stringBytes);
+ String string = text.toString();
+ String expected = ((Text) writable).toString();
+ if (!string.equals(expected)) {
+ TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
+ }
+ }
+ break;
+ case CHAR:
+ {
+ byte[] stringBytes = Arrays.copyOfRange(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+ Text text = new Text(stringBytes);
+ String string = text.toString();
+
+ HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
+
+ HiveChar expected = ((HiveCharWritable) writable).getHiveChar();
+ if (!hiveChar.equals(expected)) {
+ TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
+ }
+ }
+ break;
+ case VARCHAR:
+ {
+ byte[] stringBytes = Arrays.copyOfRange(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+ Text text = new Text(stringBytes);
+ String string = text.toString();
+
+ HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
+
+ HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar();
+ if (!hiveVarchar.equals(expected)) {
+ TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
+ }
+ }
+ break;
+ case DECIMAL:
+ {
+ HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
+ if (value == null) {
+ TestCase.fail("Decimal field evaluated to NULL");
+ }
+ HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal();
+ if (!value.equals(expected)) {
+ DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
+ int precision = decimalTypeInfo.getPrecision();
+ int scale = decimalTypeInfo.getScale();
+ TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
+ }
+ }
+ break;
+ case DATE:
+ {
+ Date value = deserializeRead.currentDateWritable.get();
+ Date expected = ((DateWritable) writable).get();
+ if (!value.equals(expected)) {
+ TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+ }
+ }
+ break;
+ case TIMESTAMP:
+ {
+ Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
+ Timestamp expected = ((TimestampWritable) writable).getTimestamp();
+ if (!value.equals(expected)) {
+ TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+ }
+ }
+ break;
+ case INTERVAL_YEAR_MONTH:
+ {
+ HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
+ HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
+ if (!value.equals(expected)) {
+ TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+ }
+ }
+ break;
+ case INTERVAL_DAY_TIME:
+ {
+ HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
+ HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
+ if (!value.equals(expected)) {
+ TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+ }
+ }
+ break;
+ case BINARY:
+ {
+ byte[] byteArray = Arrays.copyOfRange(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+ BytesWritable bytesWritable = (BytesWritable) writable;
+ byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
+ if (byteArray.length != expected.length){
+ TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+ + " found " + Arrays.toString(byteArray) + ")");
+ }
+ for (int b = 0; b < byteArray.length; b++) {
+ if (byteArray[b] != expected[b]) {
+ TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+ + " found " + Arrays.toString(byteArray) + ")");
+ }
+ }
+ }
+ break;
+ default:
+ throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
+ }
+ }
+
+ public static void serializeWrite(SerializeWrite serializeWrite,
+ PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+ if (writable == null) {
+ serializeWrite.writeNull();
+ return;
+ }
+ switch (primitiveTypeInfo.getPrimitiveCategory()) {
+ case BOOLEAN:
+ {
+ boolean value = ((BooleanWritable) writable).get();
+ serializeWrite.writeBoolean(value);
+ }
+ break;
+ case BYTE:
+ {
+ byte value = ((ByteWritable) writable).get();
+ serializeWrite.writeByte(value);
+ }
+ break;
+ case SHORT:
+ {
+ short value = ((ShortWritable) writable).get();
+ serializeWrite.writeShort(value);
+ }
+ break;
+ case INT:
+ {
+ int value = ((IntWritable) writable).get();
+ serializeWrite.writeInt(value);
+ }
+ break;
+ case LONG:
+ {
+ long value = ((LongWritable) writable).get();
+ serializeWrite.writeLong(value);
+ }
+ break;
+ case FLOAT:
+ {
+ float value = ((FloatWritable) writable).get();
+ serializeWrite.writeFloat(value);
+ }
+ break;
+ case DOUBLE:
+ {
+ double value = ((DoubleWritable) writable).get();
+ serializeWrite.writeDouble(value);
+ }
+ break;
+ case STRING:
+ {
+ Text value = (Text) writable;
+ byte[] stringBytes = value.getBytes();
+ int stringLength = stringBytes.length;
+ serializeWrite.writeString(stringBytes, 0, stringLength);
+ }
+ break;
+ case CHAR:
+ {
+ HiveChar value = ((HiveCharWritable) writable).getHiveChar();
+ serializeWrite.writeHiveChar(value);
+ }
+ break;
+ case VARCHAR:
+ {
+ HiveVarchar value = ((HiveVarcharWritable) writable).getHiveVarchar();
+ serializeWrite.writeHiveVarchar(value);
+ }
+ break;
+ case DECIMAL:
+ {
+ HiveDecimal value = ((HiveDecimalWritable) writable).getHiveDecimal();
+ DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)primitiveTypeInfo;
+ serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
+ }
+ break;
+ case DATE:
+ {
+ Date value = ((DateWritable) writable).get();
+ serializeWrite.writeDate(value);
+ }
+ break;
+ case TIMESTAMP:
+ {
+ Timestamp value = ((TimestampWritable) writable).getTimestamp();
+ serializeWrite.writeTimestamp(value);
+ }
+ break;
+ case INTERVAL_YEAR_MONTH:
+ {
+ HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
+ serializeWrite.writeHiveIntervalYearMonth(value);
+ }
+ break;
+ case INTERVAL_DAY_TIME:
+ {
+ HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
+ serializeWrite.writeHiveIntervalDayTime(value);
+ }
+ break;
+ case BINARY:
+ {
+ BytesWritable byteWritable = (BytesWritable) writable;
+ byte[] binaryBytes = byteWritable.getBytes();
+ int length = byteWritable.getLength();
+ serializeWrite.writeBinary(binaryBytes, 0, length);
+ }
+ break;
+ default:
+ throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
index be36ba4..003a2d4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
@@ -42,7 +42,7 @@ import org.apache.hadoop.io.Text;
*
* Reading some fields require a results object to receive value information. A separate
* results object is created by the caller at initialization per different field even for the same
- * type.
+ * type.
*
* Some type values are by reference to either bytes in the deserialization buffer or to
* other type specific buffers. So, those references are only valid until the next time set is
@@ -61,6 +61,8 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
private int fieldCount;
private int start;
+ private int end;
+ private int fieldStart;
private byte[] tempTimestampBytes;
private Text tempText;
@@ -68,7 +70,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
private byte[] tempDecimalBuffer;
private boolean readBeyondConfiguredFieldsWarned;
- private boolean readBeyondBufferRangeWarned;
private boolean bufferRangeHasExtraDataWarned;
private InputByteBuffer inputByteBuffer = new InputByteBuffer();
@@ -92,7 +93,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
}
inputByteBuffer = new InputByteBuffer();
readBeyondConfiguredFieldsWarned = false;
- readBeyondBufferRangeWarned = false;
bufferRangeHasExtraDataWarned = false;
}
@@ -107,8 +107,40 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
@Override
public void set(byte[] bytes, int offset, int length) {
fieldIndex = -1;
- inputByteBuffer.reset(bytes, offset, offset + length);
start = offset;
+ end = offset + length;
+ inputByteBuffer.reset(bytes, start, end);
+ }
+
+ /*
+ * Get detailed read position information to help diagnose exceptions.
+ */
+ public String getDetailedReadPositionString() {
+ StringBuffer sb = new StringBuffer();
+
+ sb.append("Reading inputByteBuffer of length ");
+ sb.append(inputByteBuffer.getEnd());
+ sb.append(" at start offset ");
+ sb.append(start);
+ sb.append(" for length ");
+ sb.append(end - start);
+ sb.append(" to read ");
+ sb.append(fieldCount);
+ sb.append(" fields with types ");
+ sb.append(Arrays.toString(typeInfos));
+ sb.append(". ");
+ if (fieldIndex == -1) {
+ sb.append("Before first field?");
+ } else {
+ sb.append("Read field #");
+ sb.append(fieldIndex);
+ sb.append(" at field start position ");
+ sb.append(fieldStart);
+ sb.append(" current read offset ");
+ sb.append(inputByteBuffer.tell());
+ }
+
+ return sb.toString();
}
/*
@@ -133,12 +165,11 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
}
if (inputByteBuffer.isEof()) {
// Also, reading beyond our byte range produces NULL.
- if (!readBeyondBufferRangeWarned) {
- doReadBeyondBufferRangeWarned();
- }
- // We cannot read beyond so we must return NULL here.
return true;
}
+
+ fieldStart = inputByteBuffer.tell();
+
byte isNullByte = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]);
if (isNullByte == 0) {
@@ -298,7 +329,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
factor = -factor;
}
- int start = inputByteBuffer.tell();
+ int decimalStart = inputByteBuffer.tell();
int length = 0;
do {
@@ -317,7 +348,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
tempDecimalBuffer = new byte[length];
}
- inputByteBuffer.seek(start);
+ inputByteBuffer.seek(decimalStart);
for (int i = 0; i < length; ++i) {
tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert);
}
@@ -392,10 +423,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
return readBeyondConfiguredFieldsWarned;
}
@Override
- public boolean readBeyondBufferRangeWarned() {
- return readBeyondBufferRangeWarned;
- }
- @Override
public boolean bufferRangeHasExtraDataWarned() {
return bufferRangeHasExtraDataWarned;
}
@@ -410,14 +437,4 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
+ " reading more (NULLs returned). Ignoring similar problems.");
readBeyondConfiguredFieldsWarned = true;
}
-
- private void doReadBeyondBufferRangeWarned() {
- // Warn only once.
- int length = inputByteBuffer.tell() - start;
- LOG.info("Reading beyond buffer range! Buffer range " + start
- + " for length " + length + " but reading more... "
- + "(total buffer length " + inputByteBuffer.getData().length + ")"
- + " Ignoring similar problems.");
- readBeyondBufferRangeWarned = true;
- }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
index 2fad2af..8f3e771 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
@@ -143,10 +143,14 @@ public abstract class DeserializeRead {
* Read integrity warning flags.
*/
public abstract boolean readBeyondConfiguredFieldsWarned();
- public abstract boolean readBeyondBufferRangeWarned();
public abstract boolean bufferRangeHasExtraDataWarned();
/*
+ * Get detailed read position information to help diagnose exceptions.
+ */
+ public abstract String getDetailedReadPositionString();
+
+ /*
* These members hold the current value that was read when readCheckNull return false.
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
deleted file mode 100644
index 1bb990c..0000000
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
+++ /dev/null
@@ -1,423 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.serde2.fast;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.common.type.RandomTypeUtil;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hive.common.util.DateUtils;
-
-/**
- * Generate object inspector and random row object[].
- */
-public class RandomRowObjectSource {
-
- private Random r;
-
- private int columnCount;
-
- private List<String> typeNames;
-
- private PrimitiveCategory[] primitiveCategories;
-
- private PrimitiveTypeInfo[] primitiveTypeInfos;
-
- private List<ObjectInspector> primitiveObjectInspectorList;
-
- private StructObjectInspector rowStructObjectInspector;
-
- public List<String> typeNames() {
- return typeNames;
- }
-
- public PrimitiveCategory[] primitiveCategories() {
- return primitiveCategories;
- }
-
- public PrimitiveTypeInfo[] primitiveTypeInfos() {
- return primitiveTypeInfos;
- }
-
- public StructObjectInspector rowStructObjectInspector() {
- return rowStructObjectInspector;
- }
-
- public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
- ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
- new ArrayList<ObjectInspector>(partialFieldCount);
- List<String> columnNames = new ArrayList<String>(partialFieldCount);
- for (int i = 0; i < partialFieldCount; i++) {
- columnNames.add(String.format("partial%d", i));
- partialPrimitiveObjectInspectorList.add(
- PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
- primitiveTypeInfos[i]));
- }
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- columnNames, primitiveObjectInspectorList);
- }
-
- public void init(Random r) {
- this.r = r;
- chooseSchema();
- }
-
- /*
- * For now, exclude CHAR until we determine why there is a difference (blank padding)
- * serializing with LazyBinarySerializeWrite and the regular SerDe...
- */
- private static String[] possibleHiveTypeNames = {
- "boolean",
- "tinyint",
- "smallint",
- "int",
- "bigint",
- "date",
- "float",
- "double",
- "string",
-// "char",
- "varchar",
- "binary",
- "date",
- "timestamp",
- "interval_year_month",
- "interval_day_time",
- "decimal"
- };
-
- private void chooseSchema() {
- HashSet hashSet = null;
- boolean allTypes;
- boolean onlyOne = (r.nextInt(100) == 7);
- if (onlyOne) {
- columnCount = 1;
- allTypes = false;
- } else {
- allTypes = r.nextBoolean();
- if (allTypes) {
- // One of each type.
- columnCount = possibleHiveTypeNames.length;
- hashSet = new HashSet<Integer>();
- } else {
- columnCount = 1 + r.nextInt(20);
- }
- }
- typeNames = new ArrayList<String>(columnCount);
- primitiveCategories = new PrimitiveCategory[columnCount];
- primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
- primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
- List<String> columnNames = new ArrayList<String>(columnCount);
- for (int c = 0; c < columnCount; c++) {
- columnNames.add(String.format("col%d", c));
- String typeName;
-
- if (onlyOne) {
- typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
- } else {
- int typeNum;
- if (allTypes) {
- while (true) {
- typeNum = r.nextInt(possibleHiveTypeNames.length);
- Integer typeNumInteger = new Integer(typeNum);
- if (!hashSet.contains(typeNumInteger)) {
- hashSet.add(typeNumInteger);
- break;
- }
- }
- } else {
- typeNum = r.nextInt(possibleHiveTypeNames.length);
- }
- typeName = possibleHiveTypeNames[typeNum];
- }
- if (typeName.equals("char")) {
- int maxLength = 1 + r.nextInt(100);
- typeName = String.format("char(%d)", maxLength);
- } else if (typeName.equals("varchar")) {
- int maxLength = 1 + r.nextInt(100);
- typeName = String.format("varchar(%d)", maxLength);
- } else if (typeName.equals("decimal")) {
- typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
- }
- PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
- primitiveTypeInfos[c] = primitiveTypeInfo;
- PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
- primitiveCategories[c] = primitiveCategory;
- primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
- typeNames.add(typeName);
- }
- rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
- }
-
- public Object[][] randomRows(int n) {
- Object[][] result = new Object[n][];
- for (int i = 0; i < n; i++) {
- result[i] = randomRow();
- }
- return result;
- }
-
- public Object[] randomRow() {
- Object row[] = new Object[columnCount];
- for (int c = 0; c < columnCount; c++) {
- Object object = randomObject(c);
- if (object == null) {
- throw new Error("Unexpected null for column " + c);
- }
- row[c] = getWritableObject(c, object);
- if (row[c] == null) {
- throw new Error("Unexpected null for writable for column " + c);
- }
- }
- return row;
- }
-
- public static void sort(Object[][] rows, ObjectInspector oi) {
- for (int i = 0; i < rows.length; i++) {
- for (int j = i + 1; j < rows.length; j++) {
- if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
- Object[] t = rows[i];
- rows[i] = rows[j];
- rows[j] = t;
- }
- }
- }
- }
-
- public void sort(Object[][] rows) {
- RandomRowObjectSource.sort(rows, rowStructObjectInspector);
- }
-
- public Object getWritableObject(int column, Object object) {
- ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
- PrimitiveCategory primitiveCategory = primitiveCategories[column];
- PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
- case BYTE:
- return ((WritableByteObjectInspector) objectInspector).create((byte) object);
- case SHORT:
- return ((WritableShortObjectInspector) objectInspector).create((short) object);
- case INT:
- return ((WritableIntObjectInspector) objectInspector).create((int) object);
- case LONG:
- return ((WritableLongObjectInspector) objectInspector).create((long) object);
- case DATE:
- return ((WritableDateObjectInspector) objectInspector).create((Date) object);
- case FLOAT:
- return ((WritableFloatObjectInspector) objectInspector).create((float) object);
- case DOUBLE:
- return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
- case STRING:
- return ((WritableStringObjectInspector) objectInspector).create((String) object);
- case CHAR:
- {
- WritableHiveCharObjectInspector writableCharObjectInspector =
- new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
- return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
- }
- case VARCHAR:
- {
- WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
- new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
- return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
- }
- case BINARY:
- return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
- case TIMESTAMP:
- return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
- case INTERVAL_YEAR_MONTH:
- return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
- case INTERVAL_DAY_TIME:
- return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
- case DECIMAL:
- {
- WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
- new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
- return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
- }
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
- }
- }
-
- public Object randomObject(int column) {
- PrimitiveCategory primitiveCategory = primitiveCategories[column];
- PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return Boolean.valueOf(r.nextInt(1) == 1);
- case BYTE:
- return Byte.valueOf((byte) r.nextInt());
- case SHORT:
- return Short.valueOf((short) r.nextInt());
- case INT:
- return Integer.valueOf(r.nextInt());
- case LONG:
- return Long.valueOf(r.nextLong());
- case DATE:
- return RandomTypeUtil.getRandDate(r);
- case FLOAT:
- return Float.valueOf(r.nextFloat() * 10 - 5);
- case DOUBLE:
- return Double.valueOf(r.nextDouble() * 10 - 5);
- case STRING:
- return RandomTypeUtil.getRandString(r);
- case CHAR:
- return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
- case VARCHAR:
- return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
- case BINARY:
- return getRandBinary(r, 1 + r.nextInt(100));
- case TIMESTAMP:
- return RandomTypeUtil.getRandTimestamp(r);
- case INTERVAL_YEAR_MONTH:
- return getRandIntervalYearMonth(r);
- case INTERVAL_DAY_TIME:
- return getRandIntervalDayTime(r);
- case DECIMAL:
- return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
- }
- }
-
- public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
- int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
- String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
- HiveChar hiveChar = new HiveChar(randomString, maxLength);
- return hiveChar;
- }
-
- public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
- int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
- String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
- HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
- return hiveVarchar;
- }
-
- public static byte[] getRandBinary(Random r, int len){
- byte[] bytes = new byte[len];
- for (int j = 0; j < len; j++){
- bytes[j] = Byte.valueOf((byte) r.nextInt());
- }
- return bytes;
- }
-
- private static final String DECIMAL_CHARS = "0123456789";
-
- public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
- while (true) {
- StringBuilder sb = new StringBuilder();
- int precision = 1 + r.nextInt(18);
- int scale = 0 + r.nextInt(precision + 1);
-
- int integerDigits = precision - scale;
-
- if (r.nextBoolean()) {
- sb.append("-");
- }
-
- if (integerDigits == 0) {
- sb.append("0");
- } else {
- sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
- }
- if (scale != 0) {
- sb.append(".");
- sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
- }
-
- HiveDecimal bd = HiveDecimal.create(sb.toString());
- if (bd.scale() > bd.precision()) {
- // Sometimes weird decimals are produced?
- continue;
- }
-
- return bd;
- }
- }
-
- public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
- String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
- String intervalYearMonthStr = String.format("%s%d-%d",
- yearMonthSignStr,
- Integer.valueOf(1800 + r.nextInt(500)), // year
- Integer.valueOf(0 + r.nextInt(12))); // month
- HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
- return intervalYearMonthVal;
- }
-
- public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
- String optionalNanos = "";
- if (r.nextInt(2) == 1) {
- optionalNanos = String.format(".%09d",
- Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
- }
- String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
- String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
- yearMonthSignStr,
- Integer.valueOf(1 + r.nextInt(28)), // day
- Integer.valueOf(0 + r.nextInt(24)), // hour
- Integer.valueOf(0 + r.nextInt(60)), // minute
- Integer.valueOf(0 + r.nextInt(60)), // second
- optionalNanos);
- HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
- return intervalDayTimeVal;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
index e562ce3..fb41420 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
@@ -96,7 +96,7 @@ public interface SerializeWrite {
/*
* STRING.
- *
+ *
* Can be used to write CHAR and VARCHAR when the caller takes responsibility for
* truncation/padding issues.
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
index 765ba7e..ac44390 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.serde2.lazy.fast;
import java.io.UnsupportedEncodingException;
import java.nio.charset.CharacterCodingException;
import java.sql.Date;
+import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -73,6 +74,7 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
private int end;
private int fieldCount;
private int fieldIndex;
+ private int parseFieldIndex;
private int fieldStart;
private int fieldLength;
@@ -124,6 +126,41 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
fieldIndex = -1;
}
+ /*
+ * Get detailed read position information to help diagnose exceptions.
+ */
+ public String getDetailedReadPositionString() {
+ StringBuffer sb = new StringBuffer();
+
+ sb.append("Reading byte[] of length ");
+ sb.append(bytes.length);
+ sb.append(" at start offset ");
+ sb.append(start);
+ sb.append(" for length ");
+ sb.append(end - start);
+ sb.append(" to read ");
+ sb.append(fieldCount);
+ sb.append(" fields with types ");
+ sb.append(Arrays.toString(typeInfos));
+ sb.append(". ");
+ if (fieldIndex == -1) {
+ sb.append("Error during field delimitor parsing of field #");
+ sb.append(parseFieldIndex);
+ } else {
+ sb.append("Read field #");
+ sb.append(fieldIndex);
+ sb.append(" at field start position ");
+ sb.append(startPosition[fieldIndex]);
+ int currentFieldLength = startPosition[fieldIndex + 1] - startPosition[fieldIndex] - 1;
+ sb.append(" for field length ");
+ sb.append(currentFieldLength);
+ sb.append(" current read offset ");
+ sb.append(offset);
+ }
+
+ return sb.toString();
+ }
+
/**
* Parse the byte[] and fill each field.
*
@@ -133,27 +170,29 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
private void parse() {
int structByteEnd = end;
- int fieldId = 0;
int fieldByteBegin = start;
int fieldByteEnd = start;
+ // Kept as a member variable to support getDetailedReadPositionString.
+ parseFieldIndex = 0;
+
// Go through all bytes in the byte[]
while (fieldByteEnd <= structByteEnd) {
if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
// Reached the end of a field?
- if (lastColumnTakesRest && fieldId == fieldCount - 1) {
+ if (lastColumnTakesRest && parseFieldIndex == fieldCount - 1) {
fieldByteEnd = structByteEnd;
}
- startPosition[fieldId] = fieldByteBegin;
- fieldId++;
- if (fieldId == fieldCount || fieldByteEnd == structByteEnd) {
+ startPosition[parseFieldIndex] = fieldByteBegin;
+ parseFieldIndex++;
+ if (parseFieldIndex == fieldCount || fieldByteEnd == structByteEnd) {
// All fields have been parsed, or bytes have been parsed.
// We need to set the startPosition of fields.length to ensure we
// can use the same formula to calculate the length of each field.
// For missing fields, their starting positions will all be the same,
// which will make their lengths to be -1 and uncheckedGetField will
// return these fields as NULLs.
- for (int i = fieldId; i <= fieldCount; i++) {
+ for (int i = parseFieldIndex; i <= fieldCount; i++) {
startPosition[i] = fieldByteEnd + 1;
}
break;
@@ -177,8 +216,8 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
}
// Missing fields?
- if (!missingFieldWarned && fieldId < fieldCount) {
- doMissingFieldWarned(fieldId);
+ if (!missingFieldWarned && parseFieldIndex < fieldCount) {
+ doMissingFieldWarned(parseFieldIndex);
}
}
@@ -518,12 +557,8 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
return missingFieldWarned;
}
@Override
- public boolean readBeyondBufferRangeWarned() {
- return extraFieldWarned;
- }
- @Override
public boolean bufferRangeHasExtraDataWarned() {
- return false; // UNDONE: Get rid of...
+ return false;
}
private void doExtraFieldWarned() {
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
index bbb35c7..0df1d79 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.serde2.lazybinary.fast;
import java.io.EOFException;
import java.io.IOException;
+import java.util.Arrays;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -31,6 +33,7 @@ import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
/*
* Directly deserialize with the caller reading field-by-field the LazyBinary serialization format.
@@ -54,6 +57,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
private int offset;
private int end;
private int fieldCount;
+ private int fieldStart;
private int fieldIndex;
private byte nullByte;
@@ -62,7 +66,6 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
private VLong tempVLong;
private boolean readBeyondConfiguredFieldsWarned;
- private boolean readBeyondBufferRangeWarned;
private boolean bufferRangeHasExtraDataWarned;
public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) {
@@ -71,7 +74,6 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
tempVInt = new VInt();
tempVLong = new VLong();
readBeyondConfiguredFieldsWarned = false;
- readBeyondBufferRangeWarned = false;
bufferRangeHasExtraDataWarned = false;
}
@@ -93,6 +95,32 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
}
/*
+ * Get detailed read position information to help diagnose exceptions.
+ */
+ public String getDetailedReadPositionString() {
+ StringBuffer sb = new StringBuffer();
+
+ sb.append("Reading byte[] of length ");
+ sb.append(bytes.length);
+ sb.append(" at start offset ");
+ sb.append(start);
+ sb.append(" for length ");
+ sb.append(end - start);
+ sb.append(" to read ");
+ sb.append(fieldCount);
+ sb.append(" fields with types ");
+ sb.append(Arrays.toString(typeInfos));
+ sb.append(". Read field #");
+ sb.append(fieldIndex);
+ sb.append(" at field start position ");
+ sb.append(fieldStart);
+ sb.append(" current read offset ");
+ sb.append(offset);
+
+ return sb.toString();
+ }
+
+ /*
* Reads the NULL information for a field.
*
* @return Returns true when the field is NULL; reading is positioned to the next field.
@@ -111,11 +139,13 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
return true;
}
+ fieldStart = offset;
+
if (fieldIndex == 0) {
// The rest of the range check for fields after the first is below after checking
// the NULL byte.
if (offset >= end) {
- warnBeyondEof();
+ throw new EOFException();
}
nullByte = bytes[offset++];
}
@@ -129,9 +159,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
// Make sure there is at least one byte that can be read for a value.
if (offset >= end) {
- // Careful: since we may be dealing with NULLs in the final NULL byte, we check after
- // the NULL byte check..
- warnBeyondEof();
+ throw new EOFException();
}
/*
@@ -149,33 +177,33 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
case SHORT:
// Last item -- ok to be at end.
if (offset + 2 > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentShort = LazyBinaryUtils.byteArrayToShort(bytes, offset);
offset += 2;
break;
case INT:
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
currentInt = tempVInt.value;
break;
case LONG:
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
offset += tempVLong.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
currentLong = tempVLong.value;
break;
case FLOAT:
// Last item -- ok to be at end.
if (offset + 4 > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentFloat = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset));
offset += 4;
@@ -183,7 +211,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
case DOUBLE:
// Last item -- ok to be at end.
if (offset + 8 > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentDouble = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset));
offset += 8;
@@ -195,18 +223,19 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
case VARCHAR:
{
// using vint instead of 4 bytes
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Could be last item for empty string -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
+
int saveStart = offset;
int length = tempVInt.value;
offset += length;
// Last item -- ok to be at end.
if (offset > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentBytes = bytes;
@@ -215,12 +244,12 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
}
break;
case DATE:
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
currentDateWritable.set(tempVInt.value);
break;
@@ -231,34 +260,37 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
offset += length;
// Last item -- ok to be at end.
if (offset > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentTimestampWritable.set(bytes, saveStart);
}
break;
case INTERVAL_YEAR_MONTH:
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
+
currentHiveIntervalYearMonthWritable.set(tempVInt.value);
break;
case INTERVAL_DAY_TIME:
+ // The first bounds check requires at least one more byte beyond for 2nd int (hence >=).
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
offset += tempVLong.length;
- if (offset >= end) {
- // Overshoot or not enough for next item.
- warnBeyondEof();
+
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
currentHiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value);
break;
@@ -269,23 +301,27 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
// These calls are to see how much data there is. The setFromBytes call below will do the same
// readVInt reads but actually unpack the decimal.
+
+ // The first bounds check requires at least one more byte beyond for 2nd int (hence >=).
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
int saveStart = offset;
offset += tempVInt.length;
- if (offset >= end) {
- // Overshoot or not enough for next item.
- warnBeyondEof();
+
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- if (offset >= end) {
- // Overshoot or not enough for next item.
- warnBeyondEof();
- }
+
offset += tempVInt.value;
// Last item -- ok to be at end.
if (offset > end) {
- warnBeyondEof();
+ throw new EOFException();
}
int length = offset - saveStart;
@@ -327,7 +363,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
if ((fieldIndex % 8) == 0) {
// Get next null byte.
if (offset >= end) {
- warnBeyondEof();
+ throw new EOFException();
}
nullByte = bytes[offset++];
}
@@ -363,23 +399,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
return readBeyondConfiguredFieldsWarned;
}
@Override
- public boolean readBeyondBufferRangeWarned() {
- return readBeyondBufferRangeWarned;
- }
- @Override
public boolean bufferRangeHasExtraDataWarned() {
return bufferRangeHasExtraDataWarned;
}
-
- private void warnBeyondEof() throws EOFException {
- if (!readBeyondBufferRangeWarned) {
- // Warn only once.
- int length = end - start;
- LOG.info("Reading beyond buffer range! Buffer range " + start
- + " for length " + length + " but reading more... "
- + "(total buffer length " + bytes.length + ")"
- + " Ignoring similar problems.");
- readBeyondBufferRangeWarned = true;
- }
- }
}