You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/05/15 03:51:42 UTC
[1/3] hive git commit: HIVE-13682: EOFException with fast hashtable
(Matt McCline, reviewed by Sergey Shelukhin)
Repository: hive
Updated Branches:
refs/heads/master fbeee6236 -> 4533d21b0
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java
index e27c6b1..52dd5a3 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java
@@ -32,12 +32,27 @@ import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
/**
* TestBinarySortableSerDe.
@@ -45,27 +60,28 @@ import org.apache.hadoop.io.Text;
*/
public class VerifyFast {
- public static void verifyDeserializeRead(DeserializeRead deserializeRead, PrimitiveTypeInfo primitiveTypeInfo, Object object) throws IOException {
+ public static void verifyDeserializeRead(DeserializeRead deserializeRead,
+ PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
boolean isNull;
isNull = deserializeRead.readCheckNull();
if (isNull) {
- if (object != null) {
+ if (writable != null) {
TestCase.fail("Field reports null but object is not null");
}
return;
- } else if (object == null) {
+ } else if (writable == null) {
TestCase.fail("Field report not null but object is null");
}
switch (primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean value = deserializeRead.currentBoolean;
- if (!(object instanceof Boolean)) {
- TestCase.fail("Boolean expected object not Boolean");
+ if (!(writable instanceof BooleanWritable)) {
+ TestCase.fail("Boolean expected writable not Boolean");
}
- Boolean expected = (Boolean) object;
+ boolean expected = ((BooleanWritable) writable).get();
if (value != expected) {
TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
}
@@ -74,10 +90,10 @@ public class VerifyFast {
case BYTE:
{
byte value = deserializeRead.currentByte;
- if (!(object instanceof Byte)) {
- TestCase.fail("Byte expected object not Byte");
+ if (!(writable instanceof ByteWritable)) {
+ TestCase.fail("Byte expected writable not Byte");
}
- Byte expected = (Byte) object;
+ byte expected = ((ByteWritable) writable).get();
if (value != expected) {
TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
}
@@ -86,10 +102,10 @@ public class VerifyFast {
case SHORT:
{
short value = deserializeRead.currentShort;
- if (!(object instanceof Short)) {
- TestCase.fail("Short expected object not Short");
+ if (!(writable instanceof ShortWritable)) {
+ TestCase.fail("Short expected writable not Short");
}
- Short expected = (Short) object;
+ short expected = ((ShortWritable) writable).get();
if (value != expected) {
TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
}
@@ -98,10 +114,10 @@ public class VerifyFast {
case INT:
{
int value = deserializeRead.currentInt;
- if (!(object instanceof Integer)) {
- TestCase.fail("Integer expected object not Integer");
+ if (!(writable instanceof IntWritable)) {
+ TestCase.fail("Integer expected writable not Integer");
}
- Integer expected = (Integer) object;
+ int expected = ((IntWritable) writable).get();
if (value != expected) {
TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
}
@@ -110,10 +126,10 @@ public class VerifyFast {
case LONG:
{
long value = deserializeRead.currentLong;
- if (!(object instanceof Long)) {
- TestCase.fail("Long expected object not Long");
+ if (!(writable instanceof LongWritable)) {
+ TestCase.fail("Long expected writable not Long");
}
- Long expected = (Long) object;
+ Long expected = ((LongWritable) writable).get();
if (value != expected) {
TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
}
@@ -122,10 +138,10 @@ public class VerifyFast {
case FLOAT:
{
float value = deserializeRead.currentFloat;
- Float expected = (Float) object;
- if (!(object instanceof Float)) {
- TestCase.fail("Float expected object not Float");
+ if (!(writable instanceof FloatWritable)) {
+ TestCase.fail("Float expected writable not Float");
}
+ float expected = ((FloatWritable) writable).get();
if (value != expected) {
TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
}
@@ -134,10 +150,10 @@ public class VerifyFast {
case DOUBLE:
{
double value = deserializeRead.currentDouble;
- Double expected = (Double) object;
- if (!(object instanceof Double)) {
- TestCase.fail("Double expected object not Double");
+ if (!(writable instanceof DoubleWritable)) {
+ TestCase.fail("Double expected writable not Double");
}
+ double expected = ((DoubleWritable) writable).get();
if (value != expected) {
TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
}
@@ -151,7 +167,7 @@ public class VerifyFast {
deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
- String expected = (String) object;
+ String expected = ((Text) writable).toString();
if (!string.equals(expected)) {
TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
}
@@ -168,7 +184,7 @@ public class VerifyFast {
HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
- HiveChar expected = (HiveChar) object;
+ HiveChar expected = ((HiveCharWritable) writable).getHiveChar();
if (!hiveChar.equals(expected)) {
TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
}
@@ -185,7 +201,7 @@ public class VerifyFast {
HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
- HiveVarchar expected = (HiveVarchar) object;
+ HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar();
if (!hiveVarchar.equals(expected)) {
TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
}
@@ -197,7 +213,7 @@ public class VerifyFast {
if (value == null) {
TestCase.fail("Decimal field evaluated to NULL");
}
- HiveDecimal expected = (HiveDecimal) object;
+ HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal();
if (!value.equals(expected)) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
int precision = decimalTypeInfo.getPrecision();
@@ -209,7 +225,7 @@ public class VerifyFast {
case DATE:
{
Date value = deserializeRead.currentDateWritable.get();
- Date expected = (Date) object;
+ Date expected = ((DateWritable) writable).get();
if (!value.equals(expected)) {
TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
@@ -218,7 +234,7 @@ public class VerifyFast {
case TIMESTAMP:
{
Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
- Timestamp expected = (Timestamp) object;
+ Timestamp expected = ((TimestampWritable) writable).getTimestamp();
if (!value.equals(expected)) {
TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
@@ -227,7 +243,7 @@ public class VerifyFast {
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
- HiveIntervalYearMonth expected = (HiveIntervalYearMonth) object;
+ HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
@@ -236,7 +252,7 @@ public class VerifyFast {
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
- HiveIntervalDayTime expected = (HiveIntervalDayTime) object;
+ HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
@@ -248,7 +264,8 @@ public class VerifyFast {
deserializeRead.currentBytes,
deserializeRead.currentBytesStart,
deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
- byte[] expected = (byte[]) object;
+ BytesWritable bytesWritable = (BytesWritable) writable;
+ byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
if (byteArray.length != expected.length){
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+ " found " + Arrays.toString(byteArray) + ")");
@@ -266,57 +283,58 @@ public class VerifyFast {
}
}
- public static void serializeWrite(SerializeWrite serializeWrite, PrimitiveTypeInfo primitiveTypeInfo, Object object) throws IOException {
- if (object == null) {
+ public static void serializeWrite(SerializeWrite serializeWrite,
+ PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+ if (writable == null) {
serializeWrite.writeNull();
return;
}
switch (primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
- boolean value = (Boolean) object;
+ boolean value = ((BooleanWritable) writable).get();
serializeWrite.writeBoolean(value);
}
break;
case BYTE:
{
- byte value = (Byte) object;
+ byte value = ((ByteWritable) writable).get();
serializeWrite.writeByte(value);
}
break;
case SHORT:
{
- short value = (Short) object;
+ short value = ((ShortWritable) writable).get();
serializeWrite.writeShort(value);
}
break;
case INT:
{
- int value = (Integer) object;
+ int value = ((IntWritable) writable).get();
serializeWrite.writeInt(value);
}
break;
case LONG:
{
- long value = (Long) object;
+ long value = ((LongWritable) writable).get();
serializeWrite.writeLong(value);
}
break;
case FLOAT:
{
- float value = (Float) object;
+ float value = ((FloatWritable) writable).get();
serializeWrite.writeFloat(value);
}
break;
case DOUBLE:
{
- double value = (Double) object;
+ double value = ((DoubleWritable) writable).get();
serializeWrite.writeDouble(value);
}
break;
case STRING:
{
- String value = (String) object;
+ Text value = (Text) writable;
byte[] stringBytes = value.getBytes();
int stringLength = stringBytes.length;
serializeWrite.writeString(stringBytes, 0, stringLength);
@@ -324,51 +342,52 @@ public class VerifyFast {
break;
case CHAR:
{
- HiveChar value = (HiveChar) object;
+ HiveChar value = ((HiveCharWritable) writable).getHiveChar();
serializeWrite.writeHiveChar(value);
}
break;
case VARCHAR:
{
- HiveVarchar value = (HiveVarchar) object;
+ HiveVarchar value = ((HiveVarcharWritable) writable).getHiveVarchar();
serializeWrite.writeHiveVarchar(value);
}
break;
case DECIMAL:
{
- HiveDecimal value = (HiveDecimal) object;
+ HiveDecimal value = ((HiveDecimalWritable) writable).getHiveDecimal();
DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)primitiveTypeInfo;
serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
}
break;
case DATE:
{
- Date value = (Date) object;
+ Date value = ((DateWritable) writable).get();
serializeWrite.writeDate(value);
}
break;
case TIMESTAMP:
{
- Timestamp value = (Timestamp) object;
+ Timestamp value = ((TimestampWritable) writable).getTimestamp();
serializeWrite.writeTimestamp(value);
}
break;
case INTERVAL_YEAR_MONTH:
{
- HiveIntervalYearMonth value = (HiveIntervalYearMonth) object;
+ HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
serializeWrite.writeHiveIntervalYearMonth(value);
}
break;
case INTERVAL_DAY_TIME:
{
- HiveIntervalDayTime value = (HiveIntervalDayTime) object;
+ HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
serializeWrite.writeHiveIntervalDayTime(value);
}
break;
case BINARY:
{
- byte[] binaryBytes = (byte[]) object;
- int length = binaryBytes.length;
+ BytesWritable byteWritable = (BytesWritable) writable;
+ byte[] binaryBytes = byteWritable.getBytes();
+ int length = byteWritable.getLength();
serializeWrite.writeBinary(binaryBytes, 0, length);
}
break;
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java
index 14fc38e..1349f74 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java
@@ -23,6 +23,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Random;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
@@ -30,6 +32,28 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.common.type.RandomTypeUtil;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+
+import com.sun.jdi.PrimitiveType;
public class MyTestClass {
@@ -202,4 +226,66 @@ public class MyTestClass {
static Object[] nrIntervalDayTime = {
HiveIntervalDayTime.valueOf("1 0:0:0")
};
+
+ public static void nonRandomRowFill(Object[][] rows, PrimitiveCategory[] primitiveCategories) {
+ int minCount = Math.min(rows.length, nrDecimal.length);
+ for (int i = 0; i < minCount; i++) {
+ Object[] row = rows[i];
+ for (int c = 0; c < primitiveCategories.length; c++) {
+ Object object = row[c]; // Current value.
+ switch (primitiveCategories[c]) {
+ case BOOLEAN:
+ // Use current for now.
+ break;
+ case BYTE:
+ object = nrByte;
+ break;
+ case SHORT:
+ object = nrShort;
+ break;
+ case INT:
+ object = nrInt;
+ break;
+ case LONG:
+ object = nrLong;
+ break;
+ case DATE:
+ object = nrDate;
+ break;
+ case FLOAT:
+ object = nrFloat;
+ break;
+ case DOUBLE:
+ object = nrDouble;
+ break;
+ case STRING:
+ object = nrString;
+ break;
+ case CHAR:
+ // Use current for now.
+ break;
+ case VARCHAR:
+ // Use current for now.
+ break;
+ case BINARY:
+ // Use current for now.
+ break;
+ case TIMESTAMP:
+ // Use current for now.
+ break;
+ case INTERVAL_YEAR_MONTH:
+ object = nrIntervalYearMonth;
+ break;
+ case INTERVAL_DAY_TIME:
+ object = nrIntervalDayTime;
+ break;
+ case DECIMAL:
+ object = nrDecimal[i];
+ break;
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategories[c]);
+ }
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
index 0be3213..58937db 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
@@ -17,53 +17,72 @@
*/
package org.apache.hadoop.hive.serde2.binarysortable;
+import java.io.EOFException;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.Random;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.VerifyFast;
-import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
import junit.framework.TestCase;
public class TestBinarySortableFast extends TestCase {
- private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasses,
+ private void testBinarySortableFast(
+ RandomRowObjectSource source, Object[][] rows,
boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker,
- SerDe serde, StructObjectInspector rowOI, boolean ascending,
- Map<Object, PrimitiveTypeInfo[]> primitiveTypeInfoMap) throws Throwable {
+ SerDe serde, StructObjectInspector rowOI,
+ SerDe serde_fewer, StructObjectInspector writeRowOI,
+ boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos,
+ boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
+
+ int rowCount = rows.length;
+ int columnCount = primitiveTypeInfos.length;
+
+ boolean[] columnsToInclude = null;
+ if (useIncludeColumns) {
+ columnsToInclude = new boolean[columnCount];
+ for (int i = 0; i < columnCount; i++) {
+ columnsToInclude[i] = r.nextBoolean();
+ }
+ }
+
+ int writeColumnCount = columnCount;
+ if (doWriteFewerColumns) {
+ writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
+ }
BinarySortableSerializeWrite binarySortableSerializeWrite =
- new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
+ new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
// Try to serialize
// One Writable per row.
- BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length];
-
- int[][] perFieldWriteLengthsArray = new int[myTestPrimitiveClasses.length][];
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
+ BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount];
+
+ int[][] perFieldWriteLengthsArray = new int[rowCount][];
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
Output output = new Output();
binarySortableSerializeWrite.set(output);
- int[] perFieldWriteLengths = new int[MyTestPrimitiveClass.primitiveCount];
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfoMap.get(t)[index], object);
+ int[] perFieldWriteLengths = new int[columnCount];
+ for (int index = 0; index < writeColumnCount; index++) {
+
+ Writable writable = (Writable) row[index];
+
+ VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfos[index], writable);
perFieldWriteLengths[index] = output.getLength();
}
perFieldWriteLengthsArray[i] = perFieldWriteLengths;
@@ -90,34 +109,87 @@ public class TestBinarySortableFast extends TestCase {
// Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t);
- BinarySortableDeserializeRead binarySortableDeserializeRead =
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
+ BinarySortableDeserializeRead binarySortableDeserializeRead =
new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc);
+ if (useIncludeColumns) {
+ binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude);
+ }
+
BytesWritable bytesWritable = serializeWriteBytes[i];
- binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
+ binarySortableDeserializeRead.set(
+ bytesWritable.getBytes(), 0, bytesWritable.getLength());
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object);
+ for (int index = 0; index < columnCount; index++) {
+ if (index >= writeColumnCount ||
+ (useIncludeColumns && !columnsToInclude[index])) {
+ // Should come back a null.
+ VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
+ } else {
+ Writable writable = (Writable) row[index];
+ VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
+ }
}
binarySortableDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned());
- TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned());
+ if (doWriteFewerColumns) {
+ TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned());
+ } else {
+ TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned());
+ }
TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned());
+
+ /*
+ * Clip off one byte and expect to get an EOFException on the write field.
+ */
+ BinarySortableDeserializeRead binarySortableDeserializeRead2 =
+ new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc);
+
+ if (useIncludeColumns) {
+ binarySortableDeserializeRead2.setColumnsToInclude(columnsToInclude);
+ }
+
+ binarySortableDeserializeRead2.set(
+ bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte.
+
+ for (int index = 0; index < writeColumnCount; index++) {
+ Writable writable = (Writable) row[index];
+ if (index == writeColumnCount - 1) {
+ boolean threw = false;
+ try {
+ VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
+ } catch (EOFException e) {
+ threw = true;
+ }
+ TestCase.assertTrue(threw);
+ } else {
+ if (useIncludeColumns && !columnsToInclude[index]) {
+ VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], null);
+ } else {
+ VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
+ }
+ }
+ }
+
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
+ for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
- List<Object> deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t);
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]);
+ // Note that regular SerDe doesn't tolerate fewer columns.
+ List<Object> deserializedRow;
+ if (doWriteFewerColumns) {
+ deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
+ } else {
+ deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
+ }
+
+ Object[] row = rows[i];
+ for (int index = 0; index < writeColumnCount; index++) {
+ Object expected = row[index];
Object object = deserializedRow.get(index);
if (expected == null || object == null) {
if (expected != null || object != null) {
@@ -132,14 +204,19 @@ public class TestBinarySortableFast extends TestCase {
}
// One Writable per row.
- BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length];
-
+ BytesWritable serdeBytes[] = new BytesWritable[rowCount];
+
// Serialize using the SerDe, then below deserialize using DeserializeRead.
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
// Since SerDe reuses memory, we will need to make a copy.
- BytesWritable serialized = (BytesWritable) serde.serialize(t, rowOI);
+ BytesWritable serialized;
+ if (doWriteFewerColumns) {
+ serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
+ } else {
+ serialized = (BytesWritable) serde.serialize(row, rowOI);;
+ }
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(serialized);
byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
@@ -167,85 +244,188 @@ public class TestBinarySortableFast extends TestCase {
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t);
- BinarySortableDeserializeRead binarySortableDeserializeRead =
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
+ BinarySortableDeserializeRead binarySortableDeserializeRead =
new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc);
+ if (useIncludeColumns) {
+ binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude);
+ }
+
BytesWritable bytesWritable = serdeBytes[i];
binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object);
+ for (int index = 0; index < columnCount; index++) {
+ if (index >= writeColumnCount ||
+ (useIncludeColumns && !columnsToInclude[index])) {
+ // Should come back a null.
+ VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
+ } else {
+ Writable writable = (Writable) row[index];
+ VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
+ }
}
binarySortableDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned());
- TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned());
+ if (doWriteFewerColumns) {
+ TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned());
+ } else {
+ TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned());
+ }
TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned());
}
}
+ private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Random r)
+ throws Throwable {
+
+ RandomRowObjectSource source = new RandomRowObjectSource();
+ source.init(r);
+
+ int rowCount = 1000;
+ Object[][] rows = source.randomRows(rowCount);
+
+ if (doNonRandomFill) {
+ MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
+ }
+
+ // We need to operate on sorted data to fully test BinarySortable.
+ source.sort(rows);
+
+ StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
+
+ PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
+ int columnCount = primitiveTypeInfos.length;
+
+ int writeColumnCount = columnCount;
+ StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
+ boolean doWriteFewerColumns = r.nextBoolean();
+ if (doWriteFewerColumns) {
+ writeColumnCount = 1 + r.nextInt(columnCount);
+ if (writeColumnCount == columnCount) {
+ doWriteFewerColumns = false;
+ } else {
+ writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
+ }
+ }
+
+ String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
+ String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
+ String order;
+ order = StringUtils.leftPad("", columnCount, '+');
+ String nullOrder;
+ nullOrder = StringUtils.leftPad("", columnCount, 'a');
+ SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
+
+ SerDe serde_ascending_fewer = null;
+ if (doWriteFewerColumns) {
+ String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
+ String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
+
+ serde_ascending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder);
+ }
+
+ order = StringUtils.leftPad("", columnCount, '-');
+ nullOrder = StringUtils.leftPad("", columnCount, 'z');
+ SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
+
+ SerDe serde_descending_fewer = null;
+ if (doWriteFewerColumns) {
+ String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
+ String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
+
+ serde_descending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder);
+ }
+
+ boolean[] columnSortOrderIsDesc = new boolean[columnCount];
+ Arrays.fill(columnSortOrderIsDesc, false);
+ byte[] columnNullMarker = new byte[columnCount];
+ Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO);
+ byte[] columnNotNullMarker = new byte[columnCount];
+ Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE);
+
+ /*
+ * Acending.
+ */
+ testBinarySortableFast(source, rows,
+ columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker,
+ serde_ascending, rowStructObjectInspector,
+ serde_ascending_fewer, writeRowStructObjectInspector,
+ /* ascending */ true, primitiveTypeInfos,
+ /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r);
+
+ testBinarySortableFast(source, rows,
+ columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker,
+ serde_ascending, rowStructObjectInspector,
+ serde_ascending_fewer, writeRowStructObjectInspector,
+ /* ascending */ true, primitiveTypeInfos,
+ /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r);
+
+ if (doWriteFewerColumns) {
+ testBinarySortableFast(source, rows,
+ columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker,
+ serde_ascending, rowStructObjectInspector,
+ serde_ascending_fewer, writeRowStructObjectInspector,
+ /* ascending */ true, primitiveTypeInfos,
+ /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
+
+ testBinarySortableFast(source, rows,
+ columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker,
+ serde_ascending, rowStructObjectInspector,
+ serde_ascending_fewer, writeRowStructObjectInspector,
+ /* ascending */ true, primitiveTypeInfos,
+ /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
+ }
+
+ /*
+ * Descending.
+ */
+ Arrays.fill(columnSortOrderIsDesc, true);
+
+ testBinarySortableFast(source, rows,
+ columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker,
+ serde_descending, rowStructObjectInspector,
+ serde_ascending_fewer, writeRowStructObjectInspector,
+ /* ascending */ false, primitiveTypeInfos,
+ /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r);
+
+ testBinarySortableFast(source, rows,
+ columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker,
+ serde_descending, rowStructObjectInspector,
+ serde_ascending_fewer, writeRowStructObjectInspector,
+ /* ascending */ false, primitiveTypeInfos,
+ /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r);
+
+ if (doWriteFewerColumns) {
+ testBinarySortableFast(source, rows,
+ columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker,
+ serde_descending, rowStructObjectInspector,
+ serde_descending_fewer, writeRowStructObjectInspector,
+ /* ascending */ false, primitiveTypeInfos,
+ /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
+
+ testBinarySortableFast(source, rows,
+ columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker,
+ serde_descending, rowStructObjectInspector,
+ serde_descending_fewer, writeRowStructObjectInspector,
+ /* ascending */ false, primitiveTypeInfos,
+ /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
+ }
+
+ }
+
public void testBinarySortableFast() throws Throwable {
+
try {
+ Random r = new Random(35790);
+
+ int caseNum = 0;
+ for (int i = 0; i < 10; i++) {
+ testBinarySortableFastCase(caseNum, (i % 2 == 0), r);
+ caseNum++;
+ }
- int num = 1000;
- Random r = new Random(1234);
- MyTestPrimitiveClass myTestPrimitiveClasses[] = new MyTestPrimitiveClass[num];
- // Need a map because we sort.
- Map<Object, PrimitiveTypeInfo[]> primitiveTypeInfoMap = new HashMap<Object, PrimitiveTypeInfo[]>();
-
- int i;
- // First try non-random values
- for (i = 0; i < MyTestClass.nrDecimal.length; i++) {
- MyTestPrimitiveClass t = new MyTestPrimitiveClass();
- ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
- t.nonRandomFill(i, extraTypeInfo);
- myTestPrimitiveClasses[i] = t;
- PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo);
- primitiveTypeInfoMap.put(t, primitiveTypeInfos);
- }
-
- for ( ; i < num; i++) {
- int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount);
- MyTestPrimitiveClass t = new MyTestPrimitiveClass();
- int field = 0;
- ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
- t.randomFill(r, randField, field, extraTypeInfo);
- myTestPrimitiveClasses[i] = t;
- PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo);
- primitiveTypeInfoMap.put(t, primitiveTypeInfos);
- }
-
- StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory
- .getReflectionObjectInspector(MyTestPrimitiveClass.class,
- ObjectInspectorOptions.JAVA);
-
- TestBinarySortableSerDe.sort(myTestPrimitiveClasses, rowOI);
-
- String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
- String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
- String order;
- order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '+');
- String nullOrder;
- nullOrder = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'a');
- SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
- order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '-');
- nullOrder = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'z');
- SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
-
- boolean[] columnSortOrderIsDesc = new boolean[MyTestPrimitiveClass.primitiveCount];
- Arrays.fill(columnSortOrderIsDesc, false);
- byte[] columnNullMarker = new byte[MyTestPrimitiveClass.primitiveCount];
- Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO);
- byte[] columnNotNullMarker = new byte[MyTestPrimitiveClass.primitiveCount];
- Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE);
- testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker,
- columnNotNullMarker, serde_ascending, rowOI, true, primitiveTypeInfoMap);
- Arrays.fill(columnSortOrderIsDesc, true);
- testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker,
- columnNotNullMarker, serde_descending, rowOI, false, primitiveTypeInfoMap);
} catch (Throwable e) {
e.printStackTrace();
throw e;
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
index 7ebe7ae..76b93c6 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.serde2.lazy;
+import java.io.EOFException;
import java.util.Arrays;
import java.util.Properties;
import java.util.Random;
@@ -24,11 +25,15 @@ import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.VerifyFast;
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
@@ -37,31 +42,55 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import junit.framework.TestCase;
public class TestLazySimpleFast extends TestCase {
- private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, LazySimpleSerDe[] serdes,
- StructObjectInspector[] rowOIs, byte separator, LazySerDeParameters[] serdeParams,
- PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable {
+ private void testLazySimpleFast(
+ RandomRowObjectSource source, Object[][] rows,
+ LazySimpleSerDe serde, StructObjectInspector rowOI,
+ LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI,
+ byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer,
+ PrimitiveTypeInfo[] primitiveTypeInfos,
+ boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
+
+ int rowCount = rows.length;
+ int columnCount = primitiveTypeInfos.length;
+
+ boolean[] columnsToInclude = null;
+ if (useIncludeColumns) {
+ columnsToInclude = new boolean[columnCount];
+ for (int i = 0; i < columnCount; i++) {
+ columnsToInclude[i] = r.nextBoolean();
+ }
+ }
+ int writeColumnCount = columnCount;
+ PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
+ if (doWriteFewerColumns) {
+ writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
+ writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
+ }
// Try to serialize
- BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length];
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
+ BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount];
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
Output output = new Output();
- LazySimpleSerializeWrite lazySimpleSerializeWrite =
- new LazySimpleSerializeWrite(MyTestPrimitiveClass.primitiveCount,
- separator, serdeParams[i]);
+ LazySimpleSerializeWrite lazySimpleSerializeWrite =
+ new LazySimpleSerializeWrite(columnCount,
+ separator, serdeParams);
lazySimpleSerializeWrite.set(output);
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfosArray[i][index], object);
+ for (int index = 0; index < columnCount; index++) {
+
+ Writable writable = (Writable) row[index];
+
+ VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
@@ -70,12 +99,15 @@ public class TestLazySimpleFast extends TestCase {
}
// Try to deserialize
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i];
- LazySimpleDeserializeRead lazySimpleDeserializeRead =
- new LazySimpleDeserializeRead(primitiveTypeInfos,
- separator, serdeParams[i]);
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
+ LazySimpleDeserializeRead lazySimpleDeserializeRead =
+ new LazySimpleDeserializeRead(writePrimitiveTypeInfos,
+ separator, serdeParams);
+
+ if (useIncludeColumns) {
+ lazySimpleDeserializeRead.setColumnsToInclude(columnsToInclude);
+ }
BytesWritable bytesWritable = serializeWriteBytes[i];
byte[] bytes = bytesWritable.getBytes();
@@ -87,28 +119,37 @@ public class TestLazySimpleFast extends TestCase {
chars[c] = (char) (bytes[c] & 0xFF);
}
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index);
- VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object);
+ for (int index = 0; index < columnCount; index++) {
+ if (index >= writeColumnCount ||
+ (useIncludeColumns && !columnsToInclude[index])) {
+ // Should come back a null.
+ VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
+ } else {
+ Writable writable = (Writable) row[index];
+ VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
+ }
}
lazySimpleDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned());
- TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
+ if (doWriteFewerColumns) {
+ TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
+ } else {
+ TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
+ }
TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned());
+
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
+ for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
- LazyStruct lazySimpleStruct = (LazyStruct) serdes[i].deserialize(bytesWritable);
+ LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i];
+ Object[] row = rows[i];
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
+ for (int index = 0; index < columnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
- Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo);
+ Writable writable = (Writable) row[index];
LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
Object object;
if (lazyPrimitive != null) {
@@ -116,12 +157,12 @@ public class TestLazySimpleFast extends TestCase {
} else {
object = null;
}
- if (expected == null || object == null) {
- if (expected != null || object != null) {
+ if (writable == null || object == null) {
+ if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
- if (!object.equals(expected)) {
+ if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
@@ -129,21 +170,19 @@ public class TestLazySimpleFast extends TestCase {
}
// One Writable per row.
- byte[][] serdeBytes = new byte[myTestPrimitiveClasses.length][];
-
+ byte[][] serdeBytes = new byte[rowCount][];
+
// Serialize using the SerDe, then below deserialize using DeserializeRead.
- Object[] row = new Object[MyTestPrimitiveClass.primitiveCount];
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i];
+ Object[] serdeRow = new Object[columnCount];
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
// LazySimple seems to work better with an row object array instead of a Java object...
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]);
- row[index] = object;
+ for (int index = 0; index < columnCount; index++) {
+ serdeRow[index] = row[index];
}
- Text serialized = (Text) serdes[i].serialize(row, rowOIs[i]);
+ Text serialized = (Text) serde.serialize(serdeRow, rowOI);
byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
@@ -154,23 +193,37 @@ public class TestLazySimpleFast extends TestCase {
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i];
- LazySimpleDeserializeRead lazySimpleDeserializeRead =
- new LazySimpleDeserializeRead(primitiveTypeInfos,
- separator, serdeParams[i]);
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
+
+ LazySimpleDeserializeRead lazySimpleDeserializeRead =
+ new LazySimpleDeserializeRead(writePrimitiveTypeInfos,
+ separator, serdeParams);
+
+ if (useIncludeColumns) {
+ lazySimpleDeserializeRead.setColumnsToInclude(columnsToInclude);
+ }
byte[] bytes = serdeBytes[i];
lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object);
+ for (int index = 0; index < columnCount; index++) {
+ if (index >= writeColumnCount ||
+ (useIncludeColumns && !columnsToInclude[index])) {
+ // Should come back a null.
+ VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
+ } else {
+ Writable writable = (Writable) row[index];
+ VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
+ }
}
lazySimpleDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned());
- TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
+ if (doWriteFewerColumns) {
+ TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
+ } else {
+ TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned());
+ }
TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned());
}
}
@@ -186,7 +239,7 @@ public class TestLazySimpleFast extends TestCase {
// Set the configuration parameters
tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
-
+
tbl.setProperty("columns", fieldNames);
tbl.setProperty("columns.types", fieldTypes);
@@ -210,44 +263,95 @@ public class TestLazySimpleFast extends TestCase {
return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
}
- public void testLazySimpleFast() throws Throwable {
- try {
+ public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r)
+ throws Throwable {
+
+ RandomRowObjectSource source = new RandomRowObjectSource();
+ source.init(r);
- int num = 1000;
- Random r = new Random(1234);
- MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num];
- PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][];
- for (int i = 0; i < num; i++) {
- int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount);
- MyTestPrimitiveClass t = new MyTestPrimitiveClass();
- int field = 0;
- ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
- t.randomFill(r, randField, field, extraTypeInfo);
- PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo);
- rows[i] = t;
- primitiveTypeInfosArray[i] = primitiveTypeInfos;
+ int rowCount = 1000;
+ Object[][] rows = source.randomRows(rowCount);
+
+ if (doNonRandomFill) {
+ MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
+ }
+
+ StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
+
+ PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
+ int columnCount = primitiveTypeInfos.length;
+
+ int writeColumnCount = columnCount;
+ StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
+ boolean doWriteFewerColumns = r.nextBoolean();
+ if (doWriteFewerColumns) {
+ writeColumnCount = 1 + r.nextInt(columnCount);
+ if (writeColumnCount == columnCount) {
+ doWriteFewerColumns = false;
+ } else {
+ writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
}
+ }
- // To get the specific type information for CHAR and VARCHAR, seems like we need an
- // inspector and SerDe per row...
- StructObjectInspector[] rowOIs = new StructObjectInspector[num];
- LazySimpleSerDe[] serdes = new LazySimpleSerDe[num];
- LazySerDeParameters[] serdeParams = new LazySerDeParameters[num];
- for (int i = 0; i < num; i++) {
- MyTestPrimitiveClass t = rows[i];
+ String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
+ String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
- StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]);
+ LazySimpleSerDe serde = getSerDe(fieldNames, fieldTypes);
+ LazySerDeParameters serdeParams = getSerDeParams(fieldNames, fieldTypes);
- String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
- String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
+ LazySimpleSerDe serde_fewer = null;
+ LazySerDeParameters serdeParams_fewer = null;
+ if (doWriteFewerColumns) {
+ String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
+ String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
+
+ serde_fewer = getSerDe(fieldNames, fieldTypes);
+ serdeParams_fewer = getSerDeParams(partialFieldNames, partialFieldTypes);
+ }
+
+ byte separator = (byte) '\t';
+ testLazySimpleFast(
+ source, rows,
+ serde, rowStructObjectInspector,
+ serde_fewer, writeRowStructObjectInspector,
+ separator, serdeParams, serdeParams_fewer, primitiveTypeInfos,
+ /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r);
+
+ testLazySimpleFast(
+ source, rows,
+ serde, rowStructObjectInspector,
+ serde_fewer, writeRowStructObjectInspector,
+ separator, serdeParams, serdeParams_fewer, primitiveTypeInfos,
+ /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r);
+
+ if (doWriteFewerColumns) {
+ testLazySimpleFast(
+ source, rows,
+ serde, rowStructObjectInspector,
+ serde_fewer, writeRowStructObjectInspector,
+ separator, serdeParams, serdeParams_fewer, primitiveTypeInfos,
+ /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
+
+ testLazySimpleFast(
+ source, rows,
+ serde, rowStructObjectInspector,
+ serde_fewer, writeRowStructObjectInspector,
+ separator, serdeParams, serdeParams_fewer, primitiveTypeInfos,
+ /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
+ }
+ }
+
+ public void testLazySimpleFast() throws Throwable {
+
+ try {
+ Random r = new Random(35790);
- rowOIs[i] = rowOI;
- serdes[i] = getSerDe(fieldNames, fieldTypes);
- serdeParams[i] = getSerDeParams(fieldNames, fieldTypes);
+ int caseNum = 0;
+ for (int i = 0; i < 10; i++) {
+ testLazySimpleFastCase(caseNum, (i % 2 == 0), r);
+ caseNum++;
}
- byte separator = (byte) '\t';
- testLazySimpleFast(rows, serdes, rowOIs, separator, serdeParams, primitiveTypeInfosArray);
} catch (Throwable e) {
e.printStackTrace();
throw e;
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
index 4032743..d7c4999 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.serde2.lazybinary;
+import java.io.EOFException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -27,9 +28,13 @@ import junit.framework.TestCase;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.VerifyFast;
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
+import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -43,24 +48,50 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
public class TestLazyBinaryFast extends TestCase {
- private void testLazyBinaryFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, SerDe[] serdes, StructObjectInspector[] rowOIs,
- PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable {
+ private void testLazyBinaryFast(
+ RandomRowObjectSource source, Object[][] rows,
+ SerDe serde, StructObjectInspector rowOI,
+ SerDe serde_fewer, StructObjectInspector writeRowOI,
+ PrimitiveTypeInfo[] primitiveTypeInfos,
+ boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
- LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(MyTestPrimitiveClass.primitiveCount);
+ int rowCount = rows.length;
+ int columnCount = primitiveTypeInfos.length;
+
+ boolean[] columnsToInclude = null;
+ if (useIncludeColumns) {
+ columnsToInclude = new boolean[columnCount];
+ for (int i = 0; i < columnCount; i++) {
+ columnsToInclude[i] = r.nextBoolean();
+ }
+ }
+
+ int writeColumnCount = columnCount;
+ PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
+ if (doWriteFewerColumns) {
+ writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
+ writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
+ }
+
+ LazyBinarySerializeWrite lazyBinarySerializeWrite =
+ new LazyBinarySerializeWrite(writeColumnCount);
// Try to serialize
- BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length];
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
+ BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount];
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
Output output = new Output();
lazyBinarySerializeWrite.set(output);
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfosArray[i][index], object);
+ for (int index = 0; index < writeColumnCount; index++) {
+
+ Writable writable = (Writable) row[index];
+
+ VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
@@ -69,44 +100,63 @@ public class TestLazyBinaryFast extends TestCase {
}
// Try to deserialize
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i];
- LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
- new LazyBinaryDeserializeRead(primitiveTypeInfos);
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
+
+ // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
+ // column.
+ LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
+ new LazyBinaryDeserializeRead(writePrimitiveTypeInfos);
+
+ if (useIncludeColumns) {
+ lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude);
+ }
BytesWritable bytesWritable = serializeWriteBytes[i];
lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index);
- VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object);
+ for (int index = 0; index < columnCount; index++) {
+ if (index >= writeColumnCount ||
+ (useIncludeColumns && !columnsToInclude[index])) {
+ // Should come back a null.
+ VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
+ } else {
+ Writable writable = (Writable) row[index];
+ VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
+ }
}
lazyBinaryDeserializeRead.extraFieldsCheck();
- TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
+ if (doWriteFewerColumns) {
+ TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
+ } else {
+ TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
+ }
TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
+ for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
- LazyBinaryStruct lazyBinaryStruct = (LazyBinaryStruct) serdes[i].deserialize(bytesWritable);
+ LazyBinaryStruct lazyBinaryStruct;
+ if (doWriteFewerColumns) {
+ lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
+ } else {
+ lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
+ }
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i];
+ Object[] row = rows[i];
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
+ for (int index = 0; index < writeColumnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
- Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo);
+ Writable writable = (Writable) row[index];
Object object = lazyBinaryStruct.getField(index);
- if (expected == null || object == null) {
- if (expected != null || object != null) {
+ if (writable == null || object == null) {
+ if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
- if (!object.equals(expected)) {
+ if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
@@ -114,88 +164,167 @@ public class TestLazyBinaryFast extends TestCase {
}
// One Writable per row.
- BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length];
-
+ BytesWritable serdeBytes[] = new BytesWritable[rowCount];
+
// Serialize using the SerDe, then below deserialize using DeserializeRead.
- Object[] row = new Object[MyTestPrimitiveClass.primitiveCount];
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i];
+ Object[] serdeRow = new Object[writeColumnCount];
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
// LazyBinary seems to work better with an row object array instead of a Java object...
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]);
- row[index] = object;
+ for (int index = 0; index < writeColumnCount; index++) {
+ serdeRow[index] = row[index];
}
- BytesWritable serialized = (BytesWritable) serdes[i].serialize(row, rowOIs[i]);
- BytesWritable bytesWritable = new BytesWritable();
- bytesWritable.set(serialized);
- byte[] bytes1 = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
+ BytesWritable serialized;
+ if (doWriteFewerColumns) {
+ serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
+ } else {
+ serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
+ }
+
+ BytesWritable bytesWritable =
+ new BytesWritable(
+ Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
+ byte[] bytes1 = bytesWritable.getBytes();
- byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
+ BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
+ byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
+ if (bytes1.length != bytes2.length) {
+ fail("SerializeWrite length " + bytes2.length + " and " +
+ "SerDe serialization length " + bytes1.length +
+ " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
+ }
if (!Arrays.equals(bytes1, bytes2)) {
- fail("SerializeWrite and SerDe serialization does not match");
+ fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
}
serdeBytes[i] = bytesWritable;
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
- for (int i = 0; i < myTestPrimitiveClasses.length; i++) {
- MyTestPrimitiveClass t = myTestPrimitiveClasses[i];
- PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i];
- LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
+ for (int i = 0; i < rowCount; i++) {
+ Object[] row = rows[i];
+
+ // When doWriteFewerColumns, try to read more fields than exist in buffer.
+ LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
new LazyBinaryDeserializeRead(primitiveTypeInfos);
+ if (useIncludeColumns) {
+ lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude);
+ }
+
BytesWritable bytesWritable = serdeBytes[i];
lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
- for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
- Object object = t.getPrimitiveObject(index);
- VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object);
+ for (int index = 0; index < columnCount; index++) {
+ if (index >= writeColumnCount ||
+ (useIncludeColumns && !columnsToInclude[index])) {
+ // Should come back a null.
+ VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
+ } else {
+ Writable writable = (Writable) row[index];
+ VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
+ }
}
lazyBinaryDeserializeRead.extraFieldsCheck();
TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
- TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
+ if (doWriteFewerColumns) {
+ // The nullByte may cause this to not be true...
+ // TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
+ } else {
+ TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned());
+ }
TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
}
}
- public void testLazyBinaryFast() throws Throwable {
- try {
+ public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
+
+ RandomRowObjectSource source = new RandomRowObjectSource();
+ source.init(r);
+
+ int rowCount = 1000;
+ Object[][] rows = source.randomRows(rowCount);
+
+ if (doNonRandomFill) {
+ MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
+ }
+
+ StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
+
+ PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
+ int columnCount = primitiveTypeInfos.length;
- int num = 1000;
- Random r = new Random(1234);
- MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num];
- PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][];
- for (int i = 0; i < num; i++) {
- int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount);
- MyTestPrimitiveClass t = new MyTestPrimitiveClass();
- int field = 0;
- ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
- t.randomFill(r, randField, field, extraTypeInfo);
- PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo);
- rows[i] = t;
- primitiveTypeInfosArray[i] = primitiveTypeInfos;
+ int writeColumnCount = columnCount;
+ StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
+ boolean doWriteFewerColumns = r.nextBoolean();
+ if (doWriteFewerColumns) {
+ writeColumnCount = 1 + r.nextInt(columnCount);
+ if (writeColumnCount == columnCount) {
+ doWriteFewerColumns = false;
+ } else {
+ writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
}
+ }
+
+ String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
+ String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
+
+ SerDe serde = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
+
+ SerDe serde_fewer = null;
+ if (doWriteFewerColumns) {
+ String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
+ String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
+
+ serde_fewer = TestLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);;
+ }
- // To get the specific type information for CHAR and VARCHAR, seems like we need an
- // inspector and SerDe per row...
- StructObjectInspector[] rowOIs = new StructObjectInspector[num];
- SerDe[] serdes = new SerDe[num];
- for (int i = 0; i < num; i++) {
- MyTestPrimitiveClass t = rows[i];
+ testLazyBinaryFast(
+ source, rows,
+ serde, rowStructObjectInspector,
+ serde_fewer, writeRowStructObjectInspector,
+ primitiveTypeInfos,
+ /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r);
- StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]);
+ testLazyBinaryFast(
+ source, rows,
+ serde, rowStructObjectInspector,
+ serde_fewer, writeRowStructObjectInspector,
+ primitiveTypeInfos,
+ /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r);
- String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
- String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
+ /*
+ * Can the LazyBinary format really tolerate writing fewer columns?
+ */
+ // if (doWriteFewerColumns) {
+ // testLazyBinaryFast(
+ // source, rows,
+ // serde, rowStructObjectInspector,
+ // serde_fewer, writeRowStructObjectInspector,
+ // primitiveTypeInfos,
+ // /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
+
+ // testLazyBinaryFast(
+ // source, rows,
+ // serde, rowStructObjectInspector,
+ // serde_fewer, writeRowStructObjectInspector,
+ // primitiveTypeInfos,
+ // /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
+ // }
+ }
+
+ public void testLazyBinaryFast() throws Throwable {
+
+ try {
+ Random r = new Random(35790);
- rowOIs[i] = rowOI;
- serdes[i] = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
+ int caseNum = 0;
+ for (int i = 0; i < 10; i++) {
+ testLazyBinaryFastCase(caseNum, (i % 2 == 0), r);
+ caseNum++;
}
- testLazyBinaryFast(rows, serdes, rowOIs, primitiveTypeInfosArray);
} catch (Throwable e) {
e.printStackTrace();
throw e;
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java
index 3fb0cfd..53a7823 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.common.type;
+import java.sql.Date;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.ParseException;
@@ -26,6 +27,100 @@ import java.util.concurrent.TimeUnit;
public class RandomTypeUtil {
+ public static String getRandString(Random r) {
+ return getRandString(r, null, r.nextInt(10));
+ }
+
+ public static String getRandString(Random r, String characters, int length) {
+ if (characters == null) {
+ characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+ }
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < length; i++) {
+ if (characters == null) {
+ sb.append((char) (r.nextInt(128)));
+ } else {
+ sb.append(characters.charAt(r.nextInt(characters.length())));
+ }
+ }
+ return sb.toString();
+ }
+
+ public static byte[] getRandBinary(Random r, int len){
+ byte[] bytes = new byte[len];
+ for (int j = 0; j < len; j++){
+ bytes[j] = Byte.valueOf((byte) r.nextInt());
+ }
+ return bytes;
+ }
+
+ private static final String DECIMAL_CHARS = "0123456789";
+
+ public static class HiveDecimalAndPrecisionScale {
+ public HiveDecimal hiveDecimal;
+ public int precision;
+ public int scale;
+
+ HiveDecimalAndPrecisionScale(HiveDecimal hiveDecimal, int precision, int scale) {
+ this.hiveDecimal = hiveDecimal;
+ this.precision = precision;
+ this.scale = scale;
+ }
+ }
+
+ public static HiveDecimalAndPrecisionScale getRandHiveDecimal(Random r) {
+ int precision;
+ int scale;
+ while (true) {
+ StringBuilder sb = new StringBuilder();
+ precision = 1 + r.nextInt(18);
+ scale = 0 + r.nextInt(precision + 1);
+
+ int integerDigits = precision - scale;
+
+ if (r.nextBoolean()) {
+ sb.append("-");
+ }
+
+ if (integerDigits == 0) {
+ sb.append("0");
+ } else {
+ sb.append(getRandString(r, DECIMAL_CHARS, integerDigits));
+ }
+ if (scale != 0) {
+ sb.append(".");
+ sb.append(getRandString(r, DECIMAL_CHARS, scale));
+ }
+
+ HiveDecimal bd = HiveDecimal.create(sb.toString());
+ precision = bd.precision();
+ scale = bd.scale();
+ if (scale > precision) {
+ // Sometimes weird decimals are produced?
+ continue;
+ }
+
+ // For now, punt.
+ precision = HiveDecimal.SYSTEM_DEFAULT_PRECISION;
+ scale = HiveDecimal.SYSTEM_DEFAULT_SCALE;
+ return new HiveDecimalAndPrecisionScale(bd, precision, scale);
+ }
+ }
+
+ public static Date getRandDate(Random r) {
+ String dateStr = String.format("%d-%02d-%02d",
+ Integer.valueOf(1800 + r.nextInt(500)), // year
+ Integer.valueOf(1 + r.nextInt(12)), // month
+ Integer.valueOf(1 + r.nextInt(28))); // day
+ Date dateVal = Date.valueOf(dateStr);
+ return dateVal;
+ }
+
+ /**
+ * TIMESTAMP.
+ */
+
public static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1);
public static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1);
public static final long NANOSECONDS_PER_MILLISSECOND = TimeUnit.MILLISECONDS.toNanos(1);
[3/3] hive git commit: HIVE-13682: EOFException with fast hashtable
(Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
HIVE-13682: EOFException with fast hashtable (Matt McCline, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4533d21b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4533d21b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4533d21b
Branch: refs/heads/master
Commit: 4533d21b0be487e1f11fcc95578a2ba103e72a64
Parents: fbeee62
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat May 14 20:44:27 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat May 14 20:44:27 2016 -0700
----------------------------------------------------------------------
.../fast/VectorMapJoinFastBytesHashMap.java | 12 +-
.../VectorMapJoinFastBytesHashMultiSet.java | 10 +-
.../fast/VectorMapJoinFastBytesHashSet.java | 5 +-
.../fast/VectorMapJoinFastBytesHashTable.java | 14 +-
.../fast/VectorMapJoinFastLongHashMap.java | 22 +-
.../fast/VectorMapJoinFastLongHashMultiSet.java | 13 +-
.../fast/VectorMapJoinFastLongHashSet.java | 16 +-
.../fast/VectorMapJoinFastLongHashTable.java | 13 -
.../fast/VectorMapJoinFastMultiKeyHashMap.java | 21 +-
.../VectorMapJoinFastMultiKeyHashMultiSet.java | 25 +-
.../fast/VectorMapJoinFastMultiKeyHashSet.java | 26 +-
.../fast/VectorMapJoinFastStringHashMap.java | 4 +-
.../VectorMapJoinFastStringHashMultiSet.java | 4 +-
.../fast/VectorMapJoinFastStringHashSet.java | 4 +-
.../fast/VectorMapJoinFastValueStore.java | 187 ++++-
.../ql/exec/vector/RandomRowObjectSource.java | 388 ----------
.../ql/exec/vector/TestVectorRowObject.java | 34 +-
.../hive/ql/exec/vector/TestVectorSerDeRow.java | 8 +-
.../vector/mapjoin/fast/CheckFastHashTable.java | 721 +++++++++++++++++++
.../mapjoin/fast/CommonFastHashTable.java | 62 +-
.../fast/TestVectorMapJoinFastBytesHashMap.java | 272 +++++++
.../TestVectorMapJoinFastBytesHashMultiSet.java | 253 +++++++
.../fast/TestVectorMapJoinFastBytesHashSet.java | 252 +++++++
.../fast/TestVectorMapJoinFastLongHashMap.java | 303 ++++----
.../TestVectorMapJoinFastLongHashMultiSet.java | 252 +++++++
.../fast/TestVectorMapJoinFastLongHashSet.java | 250 +++++++
.../TestVectorMapJoinFastMultiKeyHashMap.java | 231 ------
.../hive/serde2/fast/RandomRowObjectSource.java | 423 +++++++++++
.../fast/LazyBinaryDeserializeRead.java | 2 +-
.../apache/hadoop/hive/serde2/VerifyFast.java | 123 ++--
.../hive/serde2/binarysortable/MyTestClass.java | 86 +++
.../binarysortable/TestBinarySortableFast.java | 384 +++++++---
.../hive/serde2/lazy/TestLazySimpleFast.java | 270 ++++---
.../serde2/lazybinary/TestLazyBinaryFast.java | 285 ++++++--
.../hadoop/hive/common/type/RandomTypeUtil.java | 95 +++
35 files changed, 3852 insertions(+), 1218 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index 0ff98bd..a4bc188 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -18,16 +18,23 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single byte array value hash map optimized for vector map join.
+ * An bytes key hash map optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash map implementations.
*/
public abstract class VectorMapJoinFastBytesHashMap
extends VectorMapJoinFastBytesHashTable
@@ -37,6 +44,8 @@ public abstract class VectorMapJoinFastBytesHashMap
private VectorMapJoinFastValueStore valueStore;
+ protected BytesWritable testValueBytesWritable;
+
@Override
public VectorMapJoinHashMapResult createHashMapResult() {
return new VectorMapJoinFastValueStore.HashMapResult();
@@ -56,7 +65,6 @@ public abstract class VectorMapJoinFastBytesHashMap
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength);
// LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
- keysAssigned++;
} else {
// Add another value.
// LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 5d8ed2d..aaf3497 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -18,16 +18,23 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single byte array value hash multi-set optimized for vector map join.
+ * An bytes key hash multi-set optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash multi-set implementations.
*/
public abstract class VectorMapJoinFastBytesHashMultiSet
extends VectorMapJoinFastBytesHashTable
@@ -51,7 +58,6 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = 1; // Count.
// LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
- keysAssigned++;
} else {
// Add another value.
// LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
index 990a2e5..841183e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
@@ -27,7 +27,9 @@ import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
/*
- * An single byte array value hash multi-set optimized for vector map join.
+ * An bytes key hash set optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash set implementations.
*/
public abstract class VectorMapJoinFastBytesHashSet
extends VectorMapJoinFastBytesHashTable
@@ -50,7 +52,6 @@ public abstract class VectorMapJoinFastBytesHashSet
slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = 1; // Existence
- keysAssigned++;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index 6b536f0..d6e107b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -42,8 +42,7 @@ public abstract class VectorMapJoinFastBytesHashTable
protected VectorMapJoinFastKeyStore keyStore;
- private BytesWritable testKeyBytesWritable;
- private BytesWritable testValueBytesWritable;
+ protected BytesWritable testKeyBytesWritable;
@Override
public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException {
@@ -53,17 +52,6 @@ public abstract class VectorMapJoinFastBytesHashTable
add(keyBytes, 0, keyLength, currentValue);
}
- @VisibleForTesting
- public void putRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException {
- if (testKeyBytesWritable == null) {
- testKeyBytesWritable = new BytesWritable();
- testValueBytesWritable = new BytesWritable();
- }
- testKeyBytesWritable.set(currentKey, 0, currentKey.length);
- testValueBytesWritable.set(currentValue, 0, currentValue.length);
- putRow(testKeyBytesWritable, testValueBytesWritable);
- }
-
protected abstract void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength,
long hashCode, boolean isNewKey, BytesWritable currentValue);
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
index 1384fc9..cd51d0d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
@@ -18,17 +18,22 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single long value map optimized for vector map join.
+ * An single LONG key hash map optimized for vector map join.
*/
public class VectorMapJoinFastLongHashMap
extends VectorMapJoinFastLongHashTable
@@ -38,11 +43,26 @@ public class VectorMapJoinFastLongHashMap
protected VectorMapJoinFastValueStore valueStore;
+ private BytesWritable testValueBytesWritable;
+
@Override
public VectorMapJoinHashMapResult createHashMapResult() {
return new VectorMapJoinFastValueStore.HashMapResult();
}
+ /*
+ * A Unit Test convenience method for putting key and value into the hash table using the
+ * actual types.
+ */
+ @VisibleForTesting
+ public void testPutRow(long currentKey, byte[] currentValue) throws HiveException, IOException {
+ if (testValueBytesWritable == null) {
+ testValueBytesWritable = new BytesWritable();
+ }
+ testValueBytesWritable.set(currentValue, 0, currentValue.length);
+ add(currentKey, testValueBytesWritable);
+ }
+
@Override
public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
index 94bf706..032233a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
@@ -31,8 +31,10 @@ import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single long value multi-set optimized for vector map join.
+ * An single LONG key hash multi-set optimized for vector map join.
*/
public class VectorMapJoinFastLongHashMultiSet
extends VectorMapJoinFastLongHashTable
@@ -45,6 +47,15 @@ public class VectorMapJoinFastLongHashMultiSet
return new VectorMapJoinFastHashMultiSet.HashMultiSetResult();
}
+ /*
+ * A Unit Test convenience method for putting the key into the hash table using the
+ * actual type.
+ */
+ @VisibleForTesting
+ public void testPutRow(long currentKey) throws HiveException, IOException {
+ add(currentKey, null);
+ }
+
@Override
public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
index 2cbc548..21701d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
@@ -18,18 +18,23 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single long value multi-set optimized for vector map join.
+ * An single LONG key hash set optimized for vector map join.
*/
public class VectorMapJoinFastLongHashSet
extends VectorMapJoinFastLongHashTable
@@ -42,6 +47,15 @@ public class VectorMapJoinFastLongHashSet
return new VectorMapJoinFastHashSet.HashSetResult();
}
+ /*
+ * A Unit Test convenience method for putting the key into the hash table using the
+ * actual type.
+ */
+ @VisibleForTesting
+ public void testPutRow(long currentKey) throws HiveException, IOException {
+ add(currentKey, null);
+ }
+
@Override
public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index f37f056..0a502e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -58,8 +58,6 @@ public abstract class VectorMapJoinFastLongHashTable
private long min;
private long max;
- private BytesWritable testValueBytesWritable;
-
@Override
public boolean useMinMax() {
return useMinMax;
@@ -90,17 +88,6 @@ public abstract class VectorMapJoinFastLongHashTable
add(key, currentValue);
}
-
- @VisibleForTesting
- public void putRow(long currentKey, byte[] currentValue) throws HiveException, IOException {
- if (testValueBytesWritable == null) {
- testValueBytesWritable = new BytesWritable();
- }
- testValueBytesWritable.set(currentValue, 0, currentValue.length);
- add(currentKey, testValueBytesWritable);
- }
-
-
protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue);
public void add(long key, BytesWritable currentValue) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
index 9a9fb8d..cee3b3b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
@@ -18,17 +18,34 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
import com.google.common.annotations.VisibleForTesting;
/*
* An multi-key value hash map optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
*/
public class VectorMapJoinFastMultiKeyHashMap
extends VectorMapJoinFastBytesHashMap {
+ /*
+ * A Unit Test convenience method for putting key and value into the hash table using the
+ * actual types.
+ */
@VisibleForTesting
- public VectorMapJoinFastMultiKeyHashMap(int initialCapacity, float loadFactor, int wbSize) {
- this(false, initialCapacity, loadFactor, wbSize);
+ public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException {
+ if (testKeyBytesWritable == null) {
+ testKeyBytesWritable = new BytesWritable();
+ testValueBytesWritable = new BytesWritable();
+ }
+ testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+ testValueBytesWritable.set(currentValue, 0, currentValue.length);
+ putRow(testKeyBytesWritable, testValueBytesWritable);
}
public VectorMapJoinFastMultiKeyHashMap(
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
index a8744a5..ff82ac4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
@@ -18,15 +18,38 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An multi-key value hash multi-set optimized for vector map join.
+ * An multi-key hash multi-set optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
*/
public class VectorMapJoinFastMultiKeyHashMultiSet
extends VectorMapJoinFastBytesHashMultiSet {
+ /*
+ * A Unit Test convenience method for putting the key into the hash table using the
+ * actual type.
+ */
+ @VisibleForTesting
+ public void testPutRow(byte[] currentKey) throws HiveException, IOException {
+ if (testKeyBytesWritable == null) {
+ testKeyBytesWritable = new BytesWritable();
+ }
+ testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+ putRow(testKeyBytesWritable, null);
+ }
+
public VectorMapJoinFastMultiKeyHashMultiSet(
boolean isOuterJoin,
int initialCapacity, float loadFactor, int writeBuffersSize) {
super(initialCapacity, loadFactor, writeBuffersSize);
}
+
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
index a8048e5..de0666d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
@@ -18,15 +18,39 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An multi-key value hash set optimized for vector map join.
+ * An multi-key hash set optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
*/
public class VectorMapJoinFastMultiKeyHashSet
extends VectorMapJoinFastBytesHashSet {
+ /*
+ * A Unit Test convenience method for putting the key into the hash table using the
+ * actual type.
+ */
+ @VisibleForTesting
+ public void testPutRow(byte[] currentKey) throws HiveException, IOException {
+ if (testKeyBytesWritable == null) {
+ testKeyBytesWritable = new BytesWritable();
+ }
+ testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+ putRow(testKeyBytesWritable, null);
+ }
+
public VectorMapJoinFastMultiKeyHashSet(
boolean isOuterJoin,
int initialCapacity, float loadFactor, int writeBuffersSize) {
super(initialCapacity, loadFactor, writeBuffersSize);
}
+
+
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
index 6f181b2..35af1d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
/*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash map optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
*/
public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
index 9653b71..36120b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
/*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash multi-set optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
*/
public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
index 6419a0b..2ed6ab3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
/*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash set optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
*/
public class VectorMapJoinFastStringHashSet extends VectorMapJoinFastBytesHashSet {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index 570a747..f96e32b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -23,7 +23,9 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
import org.apache.hadoop.hive.serde2.WriteBuffers;
import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef;
-import org.apache.hadoop.hive.serde2.WriteBuffers.Position;;
+import org.apache.hadoop.hive.serde2.WriteBuffers.Position;
+
+import com.google.common.base.Preconditions;
// Supports random access.
@@ -142,7 +144,6 @@ public class VectorMapJoinFastValueStore {
}
public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) {
- // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
this.valueStore = valueStore;
this.valueRefWord = valueRefWord;
@@ -217,6 +218,10 @@ public class VectorMapJoinFastValueStore {
if (readIndex == 0) {
/*
+ * Positioned to first.
+ */
+
+ /*
* Extract information from reference word from slot table.
*/
absoluteValueOffset =
@@ -226,19 +231,32 @@ public class VectorMapJoinFastValueStore {
valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos);
if (isSingleRow) {
+ /*
+ * One element.
+ */
isNextEof = true;
valueLength =
(int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
if (!isValueLengthSmall) {
- // And, if current value is big we must read it.
+
+ // {Big Value Len} {Big Value Bytes}
valueLength = valueStore.writeBuffers.readVInt(readPos);
+ } else {
+
+ // {Small Value Bytes}
+ // (use small length from valueWordRef)
}
} else {
+ /*
+ * First of Multiple elements.
+ */
isNextEof = false;
- // 2nd and beyond records have a relative offset word at the beginning.
+ /*
+ * Read the relative offset word at the beginning 2nd and beyond records.
+ */
long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos);
long relativeOffset =
@@ -246,25 +264,31 @@ public class VectorMapJoinFastValueStore {
nextAbsoluteValueOffset = absoluteValueOffset - relativeOffset;
+ valueLength =
+ (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
+ boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
+
+ /*
+ * Optionally, read current value's big length. {Big Value Len} {Big Value Bytes}
+ * Since this is the first record, the valueRefWord directs us.
+ */
+ if (!isValueLengthSmall) {
+ valueLength = valueStore.writeBuffers.readVInt(readPos);
+ }
+
isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
isNextValueLengthSmall =
((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
- }
- valueLength =
- (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
- boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
- if (!isValueLengthSmall) {
- // And, if current value is big we must read it.
- valueLength = valueStore.writeBuffers.readVInt(readPos);
- }
-
- // 2nd and beyond have the next value's small length in the current record.
- if (isNextValueLengthSmall) {
- nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
- } else {
- nextSmallValueLength = -1;
- }
+ /*
+ * Optionally, the next value's small length could be a 2nd integer...
+ */
+ if (isNextValueLengthSmall) {
+ nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
+ } else {
+ nextSmallValueLength = -1;
+ }
+ }
} else {
if (isNextEof) {
@@ -277,24 +301,37 @@ public class VectorMapJoinFastValueStore {
valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos);
if (isNextLast) {
+ /*
+ * No realativeOffsetWord in last value. (This was the first value written.)
+ */
isNextEof = true;
if (isNextValueLengthSmall) {
+
+ // {Small Value Bytes}
valueLength = nextSmallValueLength;
} else {
- valueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+
+ // {Big Value Len} {Big Value Bytes}
+ valueLength = valueStore.writeBuffers.readVInt(readPos);
}
} else {
+ /*
+ * {Rel Offset Word} [Big Value Len] [Next Value Small Len] {Value Bytes}
+ *
+ * 2nd and beyond records have a relative offset word at the beginning.
+ */
isNextEof = false;
- // 2nd and beyond records have a relative offset word at the beginning.
long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos);
- // Read current value's big length now, if necessary.
+ /*
+ * Optionally, read current value's big length. {Big Value Len} {Big Value Bytes}
+ */
if (isNextValueLengthSmall) {
valueLength = nextSmallValueLength;
} else {
- valueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+ valueLength = valueStore.writeBuffers.readVInt(readPos);
}
long relativeOffset =
@@ -305,9 +342,13 @@ public class VectorMapJoinFastValueStore {
isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
isNextValueLengthSmall =
((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
+
+ /*
+ * Optionally, the next value's small length could be a 2nd integer in the value's
+ * information.
+ */
if (isNextValueLengthSmall) {
- // TODO: Write readVInt
- nextSmallValueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+ nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
} else {
nextSmallValueLength = -1;
}
@@ -396,6 +437,51 @@ public class VectorMapJoinFastValueStore {
private static final long flagOnMask = 1L << bitShift;
}
+ private static String valueRefWordToString(long valueRef) {
+ StringBuilder sb = new StringBuilder();
+
+ sb.append(Long.toHexString(valueRef));
+ sb.append(", ");
+ if ((valueRef & IsInvalidFlag.flagOnMask) != 0) {
+ sb.append("(Invalid optimized hash table reference), ");
+ }
+ /*
+ * Extract information.
+ */
+ long absoluteValueOffset =
+ (valueRef & AbsoluteValueOffset.bitMask);
+ int smallValueLength =
+ (int) ((valueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
+ boolean isValueLengthSmall = (smallValueLength != SmallValueLength.allBitsOn);
+ int cappedCount =
+ (int) ((valueRef & CappedCount.bitMask) >> CappedCount.bitShift);
+ boolean isValueLast =
+ ((valueRef & IsLastFlag.flagOnMask) != 0);
+
+ sb.append("absoluteValueOffset ");
+ sb.append(absoluteValueOffset);
+ sb.append(" (");
+ sb.append(Long.toHexString(absoluteValueOffset));
+ sb.append("), ");
+
+ if (isValueLengthSmall) {
+ sb.append("smallValueLength ");
+ sb.append(smallValueLength);
+ sb.append(", ");
+ } else {
+ sb.append("isValueLengthSmall = false, ");
+ }
+
+ sb.append("cappedCount ");
+ sb.append(cappedCount);
+ sb.append(", ");
+
+ sb.append("isValueLast ");
+ sb.append(isValueLast);
+
+ return sb.toString();
+ }
+
/**
* Relative Offset Word stored at the beginning of all but the last value that has a
* relative offset and 2 flags.
@@ -431,6 +517,33 @@ public class VectorMapJoinFastValueStore {
private static final long bitMask = allBitsOn << bitShift;
}
+ private static String relativeOffsetWordToString(long relativeOffsetWord) {
+ StringBuilder sb = new StringBuilder();
+
+ sb.append(Long.toHexString(relativeOffsetWord));
+ sb.append(", ");
+
+ long nextRelativeOffset =
+ (relativeOffsetWord & NextRelativeValueOffset.bitMask) >> NextRelativeValueOffset.bitShift;
+ sb.append("nextRelativeOffset ");
+ sb.append(nextRelativeOffset);
+ sb.append(" (");
+ sb.append(Long.toHexString(nextRelativeOffset));
+ sb.append("), ");
+
+ boolean isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
+ sb.append("isNextLast ");
+ sb.append(isNextLast);
+ sb.append(", ");
+
+ boolean isNextValueLengthSmall =
+ ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
+ sb.append("isNextValueLengthSmall ");
+ sb.append(isNextValueLengthSmall);
+
+ return sb.toString();
+ }
+
public long addFirst(byte[] valueBytes, int valueStart, int valueLength) {
// First value is written without: next relative offset, next value length, is next value last
@@ -473,8 +586,6 @@ public class VectorMapJoinFastValueStore {
valueRefWord |= SmallValueLength.allBitsOnBitShifted;
}
- // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
-
// The lower bits are the absolute value offset.
valueRefWord |= newAbsoluteOffset;
@@ -499,8 +610,6 @@ public class VectorMapJoinFastValueStore {
boolean isOldValueLast =
((oldValueRef & IsLastFlag.flagOnMask) != 0);
- // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
-
/*
* Write information about the old value (which becomes our next) at the beginning
* of our new value.
@@ -523,12 +632,6 @@ public class VectorMapJoinFastValueStore {
writeBuffers.writeVLong(relativeOffsetWord);
- // When the next value is small it was not recorded with the old (i.e. next) value and we
- // have to remember it.
- if (isOldValueLengthSmall) {
- writeBuffers.writeVInt(oldSmallValueLength);
- }
-
// Now, we have written all information about the next value, work on the *new* value.
long newValueRef = ((long) newCappedCount) << CappedCount.bitShift;
@@ -536,18 +639,28 @@ public class VectorMapJoinFastValueStore {
if (!isNewValueSmall) {
// Use magic value to indicating we are writing the big value length.
newValueRef |= ((long) SmallValueLength.allBitsOn << SmallValueLength.bitShift);
+ Preconditions.checkState(
+ (int) ((newValueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift) ==
+ SmallValueLength.allBitsOn);
writeBuffers.writeVInt(valueLength);
+
} else {
// Caller must remember small value length.
newValueRef |= ((long) valueLength) << SmallValueLength.bitShift;
}
+
+ // When the next value is small it was not recorded with the old (i.e. next) value and we
+ // have to remember it.
+ if (isOldValueLengthSmall) {
+
+ writeBuffers.writeVInt(oldSmallValueLength);
+ }
+
writeBuffers.write(valueBytes, valueStart, valueLength);
// The lower bits are the absolute value offset.
newValueRef |= newAbsoluteOffset;
- // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
-
return newValueRef;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
deleted file mode 100644
index 2d4baa0..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
+++ /dev/null
@@ -1,388 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
-import junit.framework.TestCase;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.common.type.RandomTypeUtil;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hive.common.util.DateUtils;
-
-/**
- * Generate object inspector and random row object[].
- */
-public class RandomRowObjectSource {
-
- private Random r;
-
- private int columnCount;
-
- private List<String> typeNames;
-
- private PrimitiveCategory[] primitiveCategories;
-
- private PrimitiveTypeInfo[] primitiveTypeInfos;
-
- private List<ObjectInspector> primitiveObjectInspectorList;
-
- private StructObjectInspector rowStructObjectInspector;
-
- public List<String> typeNames() {
- return typeNames;
- }
-
- public PrimitiveCategory[] primitiveCategories() {
- return primitiveCategories;
- }
-
- public PrimitiveTypeInfo[] primitiveTypeInfos() {
- return primitiveTypeInfos;
- }
-
- public StructObjectInspector rowStructObjectInspector() {
- return rowStructObjectInspector;
- }
-
- public void init(Random r) {
- this.r = r;
- chooseSchema();
- }
-
- private static String[] possibleHiveTypeNames = {
- "boolean",
- "tinyint",
- "smallint",
- "int",
- "bigint",
- "date",
- "float",
- "double",
- "string",
- "char",
- "varchar",
- "binary",
- "date",
- "timestamp",
- serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME,
- serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME,
- "decimal"
- };
-
- private void chooseSchema() {
- columnCount = 1 + r.nextInt(20);
- typeNames = new ArrayList<String>(columnCount);
- primitiveCategories = new PrimitiveCategory[columnCount];
- primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
- primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
- List<String> columnNames = new ArrayList<String>(columnCount);
- for (int c = 0; c < columnCount; c++) {
- columnNames.add(String.format("col%d", c));
- int typeNum = r.nextInt(possibleHiveTypeNames.length);
- String typeName = possibleHiveTypeNames[typeNum];
- if (typeName.equals("char")) {
- int maxLength = 1 + r.nextInt(100);
- typeName = String.format("char(%d)", maxLength);
- } else if (typeName.equals("varchar")) {
- int maxLength = 1 + r.nextInt(100);
- typeName = String.format("varchar(%d)", maxLength);
- } else if (typeName.equals("decimal")) {
- typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
- }
- PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
- primitiveTypeInfos[c] = primitiveTypeInfo;
- PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
- primitiveCategories[c] = primitiveCategory;
- primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
- typeNames.add(typeName);
- }
- rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
- }
-
- public Object[][] randomRows(int n) {
- Object[][] result = new Object[n][];
- for (int i = 0; i < n; i++) {
- result[i] = randomRow();
- }
- return result;
- }
-
- public Object[] randomRow() {
- Object row[] = new Object[columnCount];
- for (int c = 0; c < columnCount; c++) {
- Object object = randomObject(c);
- if (object == null) {
- throw new Error("Unexpected null for column " + c);
- }
- row[c] = getWritableObject(c, object);
- if (row[c] == null) {
- throw new Error("Unexpected null for writable for column " + c);
- }
- }
- return row;
- }
-
- public Object getWritableObject(int column, Object object) {
- ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
- PrimitiveCategory primitiveCategory = primitiveCategories[column];
- PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
- case BYTE:
- return ((WritableByteObjectInspector) objectInspector).create((byte) object);
- case SHORT:
- return ((WritableShortObjectInspector) objectInspector).create((short) object);
- case INT:
- return ((WritableIntObjectInspector) objectInspector).create((int) object);
- case LONG:
- return ((WritableLongObjectInspector) objectInspector).create((long) object);
- case DATE:
- return ((WritableDateObjectInspector) objectInspector).create((Date) object);
- case FLOAT:
- return ((WritableFloatObjectInspector) objectInspector).create((float) object);
- case DOUBLE:
- return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
- case STRING:
- return ((WritableStringObjectInspector) objectInspector).create((String) object);
- case CHAR:
- {
- WritableHiveCharObjectInspector writableCharObjectInspector =
- new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
- return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
- }
- case VARCHAR:
- {
- WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
- new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
- return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
- }
- case BINARY:
- return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
- case TIMESTAMP:
- return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
- case INTERVAL_YEAR_MONTH:
- return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
- case INTERVAL_DAY_TIME:
- return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
- case DECIMAL:
- {
- WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
- new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
- return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
- }
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
- }
- }
-
- public Object randomObject(int column) {
- PrimitiveCategory primitiveCategory = primitiveCategories[column];
- PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return Boolean.valueOf(r.nextInt(1) == 1);
- case BYTE:
- return Byte.valueOf((byte) r.nextInt());
- case SHORT:
- return Short.valueOf((short) r.nextInt());
- case INT:
- return Integer.valueOf(r.nextInt());
- case LONG:
- return Long.valueOf(r.nextLong());
- case DATE:
- return getRandDate(r);
- case FLOAT:
- return Float.valueOf(r.nextFloat() * 10 - 5);
- case DOUBLE:
- return Double.valueOf(r.nextDouble() * 10 - 5);
- case STRING:
- return getRandString(r);
- case CHAR:
- return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
- case VARCHAR:
- return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
- case BINARY:
- return getRandBinary(r, 1 + r.nextInt(100));
- case TIMESTAMP:
- return RandomTypeUtil.getRandTimestamp(r);
- case INTERVAL_YEAR_MONTH:
- return getRandIntervalYearMonth(r);
- case INTERVAL_DAY_TIME:
- return getRandIntervalDayTime(r);
- case DECIMAL:
- return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
- }
- }
-
- public static String getRandString(Random r) {
- return getRandString(r, null, r.nextInt(10));
- }
-
- public static String getRandString(Random r, String characters, int length) {
- if (characters == null) {
- characters = "ABCDEFGHIJKLMabcdefghijklm";
- }
- StringBuilder sb = new StringBuilder();
- sb.append("");
- for (int i = 0; i < length; i++) {
- if (characters == null) {
- sb.append((char) (r.nextInt(128)));
- } else {
- sb.append(characters.charAt(r.nextInt(characters.length())));
- }
- }
- return sb.toString();
- }
-
- public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
- int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
- String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
- HiveChar hiveChar = new HiveChar(randomString, maxLength);
- return hiveChar;
- }
-
- public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
- int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
- String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
- HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
- return hiveVarchar;
- }
-
- public static byte[] getRandBinary(Random r, int len){
- byte[] bytes = new byte[len];
- for (int j = 0; j < len; j++){
- bytes[j] = Byte.valueOf((byte) r.nextInt());
- }
- return bytes;
- }
-
- private static final String DECIMAL_CHARS = "0123456789";
-
- public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
- while (true) {
- StringBuilder sb = new StringBuilder();
- int precision = 1 + r.nextInt(18);
- int scale = 0 + r.nextInt(precision + 1);
-
- int integerDigits = precision - scale;
-
- if (r.nextBoolean()) {
- sb.append("-");
- }
-
- if (integerDigits == 0) {
- sb.append("0");
- } else {
- sb.append(getRandString(r, DECIMAL_CHARS, integerDigits));
- }
- if (scale != 0) {
- sb.append(".");
- sb.append(getRandString(r, DECIMAL_CHARS, scale));
- }
-
- HiveDecimal bd = HiveDecimal.create(sb.toString());
- if (bd.scale() > bd.precision()) {
- // Sometimes weird decimals are produced?
- continue;
- }
-
- return bd;
- }
- }
-
- public static Date getRandDate(Random r) {
- String dateStr = String.format("%d-%02d-%02d",
- Integer.valueOf(1800 + r.nextInt(500)), // year
- Integer.valueOf(1 + r.nextInt(12)), // month
- Integer.valueOf(1 + r.nextInt(28))); // day
- Date dateVal = Date.valueOf(dateStr);
- return dateVal;
- }
-
- public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
- String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
- String intervalYearMonthStr = String.format("%s%d-%d",
- yearMonthSignStr,
- Integer.valueOf(1800 + r.nextInt(500)), // year
- Integer.valueOf(0 + r.nextInt(12))); // month
- HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
- TestCase.assertTrue(intervalYearMonthVal != null);
- return intervalYearMonthVal;
- }
-
- public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
- String optionalNanos = "";
- if (r.nextInt(2) == 1) {
- optionalNanos = String.format(".%09d",
- Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
- }
- String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
- String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
- yearMonthSignStr,
- Integer.valueOf(1 + r.nextInt(28)), // day
- Integer.valueOf(0 + r.nextInt(24)), // hour
- Integer.valueOf(0 + r.nextInt(60)), // minute
- Integer.valueOf(0 + r.nextInt(60)), // second
- optionalNanos);
- HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
- TestCase.assertTrue(intervalDayTimeVal != null);
- return intervalDayTimeVal;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
index 959a2af..c55d951 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
@@ -19,13 +19,10 @@
package org.apache.hadoop.hive.ql.exec.vector;
import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
import java.util.Random;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import junit.framework.TestCase;
@@ -50,7 +47,7 @@ public class TestVectorRowObject extends TestCase {
}
}
- void testVectorRowObject(int caseNum, Random r) throws HiveException {
+ void testVectorRowObject(int caseNum, boolean sort, Random r) throws HiveException {
String[] emptyScratchTypeNames = new String[0];
@@ -74,6 +71,9 @@ public class TestVectorRowObject extends TestCase {
vectorExtractRow.init(source.typeNames());
Object[][] randomRows = source.randomRows(100000);
+ if (sort) {
+ source.sort(randomRows);
+ }
int firstRandomRowIndex = 0;
for (int i = 0; i < randomRows.length; i++) {
Object[] row = randomRows[i];
@@ -93,14 +93,22 @@ public class TestVectorRowObject extends TestCase {
public void testVectorRowObject() throws Throwable {
- try {
- Random r = new Random(5678);
- for (int c = 0; c < 10; c++) {
- testVectorRowObject(c, r);
+ try {
+ Random r = new Random(5678);
+
+ int caseNum = 0;
+ for (int i = 0; i < 10; i++) {
+ testVectorRowObject(caseNum, false, r);
+ caseNum++;
+ }
+
+ // Try one sorted.
+ testVectorRowObject(caseNum, true, r);
+ caseNum++;
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
}
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
index e37d2bf..da69ee3 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
@@ -22,8 +22,6 @@ import java.io.IOException;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
import java.util.Properties;
import java.util.Random;
@@ -50,6 +48,7 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
@@ -62,7 +61,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
import org.apache.hadoop.io.BooleanWritable;
@@ -86,7 +84,7 @@ public class TestVectorSerDeRow extends TestCase {
LAZY_SIMPLE
}
- void deserializeAndVerify(Output output, DeserializeRead deserializeRead,
+ void deserializeAndVerify(Output output, DeserializeRead deserializeRead,
RandomRowObjectSource source, Object[] expectedRow)
throws HiveException, IOException {
deserializeRead.set(output.getData(), 0, output.getLength());
@@ -523,7 +521,7 @@ public class TestVectorSerDeRow extends TestCase {
// Set the configuration parameters
tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
-
+
tbl.setProperty("columns", fieldNames);
tbl.setProperty("columns.types", fieldTypes);
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
new file mode 100644
index 0000000..3a23584
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
@@ -0,0 +1,721 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeMap;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult;
+import org.apache.hadoop.hive.serde2.WriteBuffers;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.WritableComparator;
+
+import com.google.common.base.Preconditions;
+
+import static org.junit.Assert.*;
+
+public class CheckFastHashTable {
+
+ public static boolean findMatch(byte[] valueBytes, List<byte[]> actualValues, int actualCount, boolean[] taken) {
+ for (int i = 0; i < actualCount; i++) {
+ if (!taken[i]) {
+ byte[] actualBytes = actualValues.get(i);
+ if (StringExpr.compare(valueBytes, 0, valueBytes.length, actualBytes, 0, actualBytes.length) == 0) {
+ taken[i] = true;
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ public static void verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult,
+ List<byte[]> values) {
+
+ int valueCount = values.size();
+
+ WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
+
+ // Read through all values.
+ List<byte[]> actualValues = new ArrayList<byte[]>();
+ while (true) {
+ byte[] bytes = ref.getBytes();
+ int offset = (int) ref.getOffset();
+ int length = ref.getLength();
+
+ if (length == 0) {
+ actualValues.add(new byte[0]);
+ } else {
+ actualValues.add(Arrays.copyOfRange(bytes, offset, offset + length));
+ }
+ ref = hashMapResult.next();
+ if (ref == null) {
+ break;
+ }
+ }
+
+ int actualCount = actualValues.size();
+
+ if (valueCount != actualCount) {
+ TestCase.fail("values.size() " + valueCount + " does not match actualCount " + actualCount);
+ }
+
+ boolean[] taken = new boolean[actualCount];
+
+ for (int i = 0; i < actualCount; i++) {
+ byte[] valueBytes = values.get(i);
+
+ if (!findMatch(valueBytes, actualValues, actualCount, taken)) {
+ List<Integer> availableLengths = new ArrayList<Integer>();
+ for (int a = 0; a < actualCount; a++) {
+ if (!taken[a]) {
+ availableLengths.add(actualValues.get(a).length);
+ }
+ }
+ TestCase.fail("No match for actual value (valueBytes length " + valueBytes.length +
+ ", availableLengths " + availableLengths.toString() + " of " + actualCount + " total)");
+ }
+ }
+ }
+
+ /*
+ * Element for Key: Long x Hash Table: HashMap
+ */
+ public static class FastLongHashMapElement {
+ private long key;
+ private List<byte[]> values;
+
+ public FastLongHashMapElement(long key, byte[] firstValue) {
+ this.key = key;
+ values = new ArrayList<byte[]>();
+ values.add(firstValue);
+ }
+
+ public long getKey() {
+ return key;
+ }
+
+ public int getValueCount() {
+ return values.size();
+ }
+
+ public List<byte[]> getValues() {
+ return values;
+ }
+
+ public void addValue(byte[] value) {
+ values.add(value);
+ }
+ }
+
+ /*
+ * Verify table for Key: Long x Hash Table: HashMap
+ */
+ public static class VerifyFastLongHashMap {
+
+ private int count;
+
+ private FastLongHashMapElement[] array;
+
+ private HashMap<Long, Integer> keyValueMap;
+
+ public VerifyFastLongHashMap() {
+ count = 0;
+ array = new FastLongHashMapElement[50];
+ keyValueMap = new HashMap<Long, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(long key) {
+ return keyValueMap.containsKey(key);
+ }
+
+ public void add(long key, byte[] value) {
+ if (keyValueMap.containsKey(key)) {
+ int index = keyValueMap.get(key);
+ array[index].addValue(value);
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastLongHashMapElement[] newArray = new FastLongHashMapElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastLongHashMapElement(key, value);
+ keyValueMap.put(key, count);
+ count++;
+ }
+ }
+
+ public long addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].addValue(value);
+ return array[index].getKey();
+ }
+
+ public long getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public List<byte[]> getValues(int index) {
+ return array[index].getValues();
+ }
+
+ public void verify(VectorMapJoinFastLongHashMap map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastLongHashMapElement element = array[index];
+ long key = element.getKey();
+ List<byte[]> values = element.getValues();
+
+ VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
+ JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+
+ verifyHashMapValues(hashMapResult, values);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: byte[] x Hash Table: HashMap
+ */
+ public static class FastBytesHashMapElement {
+ private byte[] key;
+ private List<byte[]> values;
+
+ public FastBytesHashMapElement(byte[] key, byte[] firstValue) {
+ this.key = key;
+ values = new ArrayList<byte[]>();
+ values.add(firstValue);
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+
+ public int getValueCount() {
+ return values.size();
+ }
+
+ public List<byte[]> getValues() {
+ return values;
+ }
+
+ public void addValue(byte[] value) {
+ values.add(value);
+ }
+ }
+
+ /*
+ * Verify table for Key: byte[] x Hash Table: HashMap
+ */
+ public static class VerifyFastBytesHashMap {
+
+ private int count;
+
+ private FastBytesHashMapElement[] array;
+
+ private TreeMap<BytesWritable, Integer> keyValueMap;
+
+ public VerifyFastBytesHashMap() {
+ count = 0;
+ array = new FastBytesHashMapElement[50];
+
+ // We use BytesWritable because it supports Comparable for our TreeMap.
+ keyValueMap = new TreeMap<BytesWritable, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ return keyValueMap.containsKey(keyBytesWritable);
+ }
+
+ public void add(byte[] key, byte[] value) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ if (keyValueMap.containsKey(keyBytesWritable)) {
+ int index = keyValueMap.get(keyBytesWritable);
+ array[index].addValue(value);
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastBytesHashMapElement[] newArray = new FastBytesHashMapElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastBytesHashMapElement(key, value);
+ keyValueMap.put(keyBytesWritable, count);
+ count++;
+ }
+ }
+
+ public byte[] addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].addValue(value);
+ return array[index].getKey();
+ }
+
+ public byte[] getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public List<byte[]> getValues(int index) {
+ return array[index].getValues();
+ }
+
+ public void verify(VectorMapJoinFastBytesHashMap map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastBytesHashMapElement element = array[index];
+ byte[] key = element.getKey();
+ List<byte[]> values = element.getValues();
+
+ VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
+ JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+
+ verifyHashMapValues(hashMapResult, values);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: Long x Hash Table: HashMultiSet
+ */
+ public static class FastLongHashMultiSetElement {
+ private long key;
+ private int multiSetCount;
+
+ public FastLongHashMultiSetElement(long key) {
+ this.key = key;
+ multiSetCount = 1;
+ }
+
+ public long getKey() {
+ return key;
+ }
+
+ public int getMultiSetCount() {
+ return multiSetCount;
+ }
+
+ public void incrementMultiSetCount() {
+ multiSetCount++;
+ }
+ }
+
+ /*
+ * Verify table for Key: Long x Hash Table: HashMultiSet
+ */
+ public static class VerifyFastLongHashMultiSet {
+
+ private int count;
+
+ private FastLongHashMultiSetElement[] array;
+
+ private HashMap<Long, Integer> keyValueMap;
+
+ public VerifyFastLongHashMultiSet() {
+ count = 0;
+ array = new FastLongHashMultiSetElement[50];
+ keyValueMap = new HashMap<Long, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(long key) {
+ return keyValueMap.containsKey(key);
+ }
+
+ public void add(long key) {
+ if (keyValueMap.containsKey(key)) {
+ int index = keyValueMap.get(key);
+ array[index].incrementMultiSetCount();
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastLongHashMultiSetElement[] newArray = new FastLongHashMultiSetElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastLongHashMultiSetElement(key);
+ keyValueMap.put(key, count);
+ count++;
+ }
+ }
+
+ public long addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].incrementMultiSetCount();
+ return array[index].getKey();
+ }
+
+ public long getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public int getMultiSetCount(int index) {
+ return array[index].getMultiSetCount();
+ }
+
+ public void verify(VectorMapJoinFastLongHashMultiSet map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastLongHashMultiSetElement element = array[index];
+ long key = element.getKey();
+ int multiSetCount = element.getMultiSetCount();
+
+ VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key, hashMultiSetResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+
+ assertEquals(hashMultiSetResult.count(), multiSetCount);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: byte[] x Hash Table: HashMultiSet
+ */
+ public static class FastBytesHashMultiSetElement {
+ private byte[] key;
+ private int multiSetCount;
+
+ public FastBytesHashMultiSetElement(byte[] key) {
+ this.key = key;
+ multiSetCount = 1;
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+
+ public int getMultiSetCount() {
+ return multiSetCount;
+ }
+
+ public void incrementMultiSetCount() {
+ multiSetCount++;
+ }
+ }
+
+ /*
+ * Verify table for Key: byte[] x Hash Table: HashMultiSet
+ */
+ public static class VerifyFastBytesHashMultiSet {
+
+ private int count;
+
+ private FastBytesHashMultiSetElement[] array;
+
+ private TreeMap<BytesWritable, Integer> keyValueMap;
+
+ public VerifyFastBytesHashMultiSet() {
+ count = 0;
+ array = new FastBytesHashMultiSetElement[50];
+
+ // We use BytesWritable because it supports Comparable for our TreeMap.
+ keyValueMap = new TreeMap<BytesWritable, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ return keyValueMap.containsKey(keyBytesWritable);
+ }
+
+ public void add(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ if (keyValueMap.containsKey(keyBytesWritable)) {
+ int index = keyValueMap.get(keyBytesWritable);
+ array[index].incrementMultiSetCount();
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastBytesHashMultiSetElement[] newArray = new FastBytesHashMultiSetElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastBytesHashMultiSetElement(key);
+ keyValueMap.put(keyBytesWritable, count);
+ count++;
+ }
+ }
+
+ public byte[] addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].incrementMultiSetCount();
+ return array[index].getKey();
+ }
+
+ public byte[] getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public int getMultiSetCount(int index) {
+ return array[index].getMultiSetCount();
+ }
+
+ public void verify(VectorMapJoinFastBytesHashMultiSet map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastBytesHashMultiSetElement element = array[index];
+ byte[] key = element.getKey();
+ int multiSetCount = element.getMultiSetCount();
+
+ VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashMultiSetResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+
+ assertEquals(hashMultiSetResult.count(), multiSetCount);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: Long x Hash Table: HashSet
+ */
+ public static class FastLongHashSetElement {
+ private long key;
+
+ public FastLongHashSetElement(long key) {
+ this.key = key;
+ }
+
+ public long getKey() {
+ return key;
+ }
+ }
+
+ /*
+ * Verify table for Key: Long x Hash Table: HashSet
+ */
+ public static class VerifyFastLongHashSet {
+
+ private int count;
+
+ private FastLongHashSetElement[] array;
+
+ private HashMap<Long, Integer> keyValueMap;
+
+ public VerifyFastLongHashSet() {
+ count = 0;
+ array = new FastLongHashSetElement[50];
+ keyValueMap = new HashMap<Long, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(long key) {
+ return keyValueMap.containsKey(key);
+ }
+
+ public void add(long key) {
+ if (keyValueMap.containsKey(key)) {
+ // Already exists.
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastLongHashSetElement[] newArray = new FastLongHashSetElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastLongHashSetElement(key);
+ keyValueMap.put(key, count);
+ count++;
+ }
+ }
+
+ public long addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+
+ // Exists aleady.
+
+ return array[index].getKey();
+ }
+
+ public long getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public void verify(VectorMapJoinFastLongHashSet map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastLongHashSetElement element = array[index];
+ long key = element.getKey();
+
+ VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key, hashSetResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+ }
+ }
+ }
+
+ /*
+ * Element for Key: byte[] x Hash Table: HashSet
+ */
+ public static class FastBytesHashSetElement {
+ private byte[] key;
+
+ public FastBytesHashSetElement(byte[] key) {
+ this.key = key;
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+ }
+
+ /*
+ * Verify table for Key: byte[] x Hash Table: HashSet
+ */
+ public static class VerifyFastBytesHashSet {
+
+ private int count;
+
+ private FastBytesHashSetElement[] array;
+
+ private TreeMap<BytesWritable, Integer> keyValueMap;
+
+ public VerifyFastBytesHashSet() {
+ count = 0;
+ array = new FastBytesHashSetElement[50];
+
+ // We use BytesWritable because it supports Comparable for our TreeMap.
+ keyValueMap = new TreeMap<BytesWritable, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ return keyValueMap.containsKey(keyBytesWritable);
+ }
+
+ public void add(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ if (keyValueMap.containsKey(keyBytesWritable)) {
+ // Already exists.
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastBytesHashSetElement[] newArray = new FastBytesHashSetElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastBytesHashSetElement(key);
+ keyValueMap.put(keyBytesWritable, count);
+ count++;
+ }
+ }
+
+ public byte[] addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+
+ // Already exists.
+
+ return array[index].getKey();
+ }
+
+ public byte[] getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public void verify(VectorMapJoinFastBytesHashSet map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastBytesHashSetElement element = array[index];
+ byte[] key = element.getKey();
+
+ VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashSetResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
index c2375e0..90e8f33 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
@@ -18,16 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
import java.util.Random;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
-import org.apache.hadoop.hive.serde2.WriteBuffers;
-
-import static org.junit.Assert.*;
-
public class CommonFastHashTable {
protected static final float LOAD_FACTOR = 0.75f;
@@ -39,6 +31,10 @@ public class CommonFastHashTable {
protected static final int LARGE_CAPACITY = 8388608;
protected static Random random;
+ protected static int MAX_KEY_LENGTH = 100;
+
+ protected static int MAX_VALUE_LENGTH = 1000;
+
public static int generateLargeCount() {
int count = 0;
if (random.nextInt(100) != 0) {
@@ -75,54 +71,4 @@ public class CommonFastHashTable {
}
return count;
}
- public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult,
- RandomByteArrayStream randomByteArrayStream ) {
-
- List<byte[]> resultBytes = new ArrayList<byte[]>();
- int count = 0;
- if (hashMapResult.hasRows()) {
- WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
- while (ref != null) {
- count++;
- byte[] bytes = ref.getBytes();
- int offset = (int) ref.getOffset();
- int length = ref.getLength();
- resultBytes.add(Arrays.copyOfRange(bytes, offset, offset + length));
- ref = hashMapResult.next();
- }
- } else {
- assertTrue(hashMapResult.isEof());
- }
- if (randomByteArrayStream.size() != count) {
- assertTrue(false);
- }
-
- for (int i = 0; i < count; ++i) {
- byte[] bytes = resultBytes.get(i);
- if (!randomByteArrayStream.contains(bytes)) {
- assertTrue(false);
- }
- }
- }
-
- public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult,
- byte[] valueBytes ) {
-
- assertTrue(hashMapResult.hasRows());
- WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
- byte[] bytes = ref.getBytes();
- int offset = (int) ref.getOffset();
- int length = ref.getLength();
- assertTrue(valueBytes.length == length);
- boolean match = true; // Assume
- for (int j = 0; j < length; j++) {
- if (valueBytes[j] != bytes[offset + j]) {
- match = false;
- break;
- }
- }
- if (!match) {
- assertTrue(false);
- }
- }
}
\ No newline at end of file
[2/3] hive git commit: HIVE-13682: EOFException with fast hashtable
(Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
new file mode 100644
index 0000000..bbfa65f
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
@@ -0,0 +1,272 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Test;
+
+/*
+ * An multi-key value hash map optimized for vector map join.
+ *
+ * The key is uninterpreted bytes.
+ */
+public class TestVectorMapJoinFastBytesHashMap extends CommonFastHashTable {
+
+ @Test
+ public void testOneKey() throws Exception {
+ random = new Random(82733);
+
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+ byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ verifyTable.verify(map);
+
+ // Second value.
+ value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ verifyTable.verify(map);
+
+ // Third value.
+ value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testMultipleKeysSingleValue() throws Exception {
+ random = new Random(29383);
+
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+ int keyCount = 100 + random.nextInt(1000);
+ for (int i = 0; i < keyCount; i++) {
+ byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ verifyTable.verify(map);
+ }
+ }
+
+ @Test
+ public void testGetNonExistent() throws Exception {
+ random = new Random(1002);
+
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+ byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key1);
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+
+ map.testPutRow(key1, value);
+ verifyTable.add(key1, value);
+ verifyTable.verify(map);
+
+ byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key2);
+ VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
+ JoinUtil.JoinResult joinResult = map.lookup(key2, 0, key2.length, hashMapResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ assertTrue(!hashMapResult.hasRows());
+
+ map.testPutRow(key2, value);
+ verifyTable.add(key2, value);
+ verifyTable.verify(map);
+
+ byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key3);
+ hashMapResult = map.createHashMapResult();
+ joinResult = map.lookup(key3, 0, key3.length, hashMapResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ assertTrue(!hashMapResult.hasRows());
+ }
+
+ @Test
+ public void testFullMap() throws Exception {
+ random = new Random(200001);
+
+ // Make sure the map does not expand; should be able to find space.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(false,CAPACITY, 1f, WB_SIZE);
+
+ VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+ for (int i = 0; i < CAPACITY; i++) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+
+ byte[] anotherKey;
+ while (true) {
+ anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(anotherKey);
+ if (!verifyTable.contains(anotherKey)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
+ JoinUtil.JoinResult joinResult = map.lookup(anotherKey, 0, anotherKey.length, hashMapResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ }
+
+ @Test
+ public void testExpand() throws Exception {
+ random = new Random(99221);
+
+ // Start with capacity 1; make sure we expand on every put.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(false,1, 0.0000001f, WB_SIZE);
+
+ VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+ for (int i = 0; i < 18; ++i) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ // assertEquals(1 << 18, map.getCapacity());
+ }
+
+ public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+ VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable)
+ throws HiveException, IOException {
+ for (int i = 0; i < keyCount; i++) {
+ byte[] value = new byte[generateLargeCount() - 1];
+ random.nextBytes(value);
+
+ // Add a new key or add a value to an existing key?
+ if (random.nextBoolean() || verifyTable.getCount() == 0) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ // verifyTable.verify(map);
+ } else {
+ byte[] randomExistingKey = verifyTable.addRandomExisting(value, random);
+ map.testPutRow(randomExistingKey, value);
+ // verifyTable.verify(map);
+ }
+ }
+ verifyTable.verify(map);
+ }
+ @Test
+ public void testMultipleKeysMultipleValue() throws Exception {
+ random = new Random(9332);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+
+ @Test
+ public void testLargeAndExpand() throws Exception {
+ random = new Random(21111);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
+
+ VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
new file mode 100644
index 0000000..449a8b2
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
@@ -0,0 +1,253 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMultiSet;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Test;
+
+public class TestVectorMapJoinFastBytesHashMultiSet extends CommonFastHashTable {
+
+ @Test
+ public void testOneKey() throws Exception {
+ random = new Random(5255);
+
+ VectorMapJoinFastMultiKeyHashMultiSet map =
+ new VectorMapJoinFastMultiKeyHashMultiSet(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
+
+ byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+
+ // Second time.
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+
+ // Third time.
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testMultipleKeysSingleValue() throws Exception {
+ random = new Random(2374);
+
+ VectorMapJoinFastMultiKeyHashMultiSet map =
+ new VectorMapJoinFastMultiKeyHashMultiSet(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
+
+ int keyCount = 100 + random.nextInt(1000);
+ for (int i = 0; i < keyCount; i++) {
+ byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testGetNonExistent() throws Exception {
+ random = new Random(98222);
+
+ VectorMapJoinFastMultiKeyHashMultiSet map =
+ new VectorMapJoinFastMultiKeyHashMultiSet(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
+
+ byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key1);
+
+ map.testPutRow(key1);
+ verifyTable.add(key1);
+ verifyTable.verify(map);
+
+ byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key2);
+ VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key2, 0, key2.length, hashMultiSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+
+ map.testPutRow(key2);
+ verifyTable.add(key2);
+ verifyTable.verify(map);
+
+ byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key3);
+ hashMultiSetResult = map.createHashMultiSetResult();
+ joinResult = map.contains(key3, 0, key3.length, hashMultiSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ assertEquals(hashMultiSetResult.count(), 0);
+ }
+
+ @Test
+ public void testFullMap() throws Exception {
+ random = new Random(9024);
+
+ // Make sure the map does not expand; should be able to find space.
+ VectorMapJoinFastMultiKeyHashMultiSet map =
+ new VectorMapJoinFastMultiKeyHashMultiSet(false,CAPACITY, 1f, WB_SIZE);
+
+ VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
+
+ for (int i = 0; i < CAPACITY; i++) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+
+ byte[] anotherKey;
+ while (true) {
+ anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(anotherKey);
+ if (!verifyTable.contains(anotherKey)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(anotherKey, 0, anotherKey.length, hashMultiSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ }
+
+ @Test
+ public void testExpand() throws Exception {
+ random = new Random(2933);
+
+ // Start with capacity 1; make sure we expand on every put.
+ VectorMapJoinFastMultiKeyHashMultiSet map =
+ new VectorMapJoinFastMultiKeyHashMultiSet(false,1, 0.0000001f, WB_SIZE);
+
+ VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
+
+ for (int i = 0; i < 18; ++i) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ // assertEquals(1 << 18, map.getCapacity());
+ }
+
+ public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+ VectorMapJoinFastMultiKeyHashMultiSet map, VerifyFastBytesHashMultiSet verifyTable)
+ throws HiveException, IOException {
+ for (int i = 0; i < keyCount; i++) {
+ byte[] value = new byte[generateLargeCount() - 1];
+ random.nextBytes(value);
+
+ // Add a new key or add a value to an existing key?
+ if (random.nextBoolean() || verifyTable.getCount() == 0) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ } else {
+ byte[] randomExistingKey = verifyTable.addRandomExisting(value, random);
+ map.testPutRow(randomExistingKey);
+ // verifyTable.verify(map);
+ }
+ }
+ verifyTable.verify(map);
+ }
+ @Test
+ public void testMultipleKeysMultipleValue() throws Exception {
+ random = new Random(5445);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMultiSet map =
+ new VectorMapJoinFastMultiKeyHashMultiSet(
+ false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+
+ @Test
+ public void testLargeAndExpand() throws Exception {
+ random = new Random(5637);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMultiSet map =
+ new VectorMapJoinFastMultiKeyHashMultiSet(
+ false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
+
+ VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
new file mode 100644
index 0000000..ef7c91c
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashSet;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Test;
+
+public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable {
+
+ @Test
+ public void testOneKey() throws Exception {
+ random = new Random(81104);
+
+ VectorMapJoinFastMultiKeyHashSet map =
+ new VectorMapJoinFastMultiKeyHashSet(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
+
+ byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+
+ // Second time.
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+
+ // Third time.
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testMultipleKeysSingleValue() throws Exception {
+ random = new Random(1120);
+
+ VectorMapJoinFastMultiKeyHashSet map =
+ new VectorMapJoinFastMultiKeyHashSet(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
+
+ int keyCount = 100 + random.nextInt(1000);
+ for (int i = 0; i < keyCount; i++) {
+ byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testGetNonExistent() throws Exception {
+ random = new Random(2293);
+
+ VectorMapJoinFastMultiKeyHashSet map =
+ new VectorMapJoinFastMultiKeyHashSet(
+ false,CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
+
+ byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key1);
+
+ map.testPutRow(key1);
+ verifyTable.add(key1);
+ verifyTable.verify(map);
+
+ byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key2);
+ VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key2, 0, key2.length, hashSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+
+ map.testPutRow(key2);
+ verifyTable.add(key2);
+ verifyTable.verify(map);
+
+ byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key3);
+ hashSetResult = map.createHashSetResult();
+ joinResult = map.contains(key3, 0, key3.length, hashSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ }
+
+ @Test
+ public void testFullMap() throws Exception {
+ random = new Random(219);
+
+ // Make sure the map does not expand; should be able to find space.
+ VectorMapJoinFastMultiKeyHashSet map =
+ new VectorMapJoinFastMultiKeyHashSet(false,CAPACITY, 1f, WB_SIZE);
+
+ VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
+
+ for (int i = 0; i < CAPACITY; i++) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+
+ byte[] anotherKey;
+ while (true) {
+ anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(anotherKey);
+ if (!verifyTable.contains(anotherKey)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(anotherKey, 0, anotherKey.length, hashSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ }
+
+ @Test
+ public void testExpand() throws Exception {
+ random = new Random(773);
+
+ // Start with capacity 1; make sure we expand on every put.
+ VectorMapJoinFastMultiKeyHashSet map =
+ new VectorMapJoinFastMultiKeyHashSet(false,1, 0.0000001f, WB_SIZE);
+
+ VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
+
+ for (int i = 0; i < 18; ++i) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ // assertEquals(1 << 18, map.getCapacity());
+ }
+
+ public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+ VectorMapJoinFastMultiKeyHashSet map, VerifyFastBytesHashSet verifyTable)
+ throws HiveException, IOException {
+ for (int i = 0; i < keyCount; i++) {
+ byte[] value = new byte[generateLargeCount() - 1];
+ random.nextBytes(value);
+
+ // Add a new key or add a value to an existing key?
+ if (random.nextBoolean() || verifyTable.getCount() == 0) {
+ byte[] key;
+ while (true) {
+ key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+ random.nextBytes(key);
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ } else {
+ byte[] randomExistingKey = verifyTable.addRandomExisting(value, random);
+ map.testPutRow(randomExistingKey);
+ // verifyTable.verify(map);
+ }
+ }
+ verifyTable.verify(map);
+ }
+ @Test
+ public void testMultipleKeysMultipleValue() throws Exception {
+ random = new Random(9);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashSet map =
+ new VectorMapJoinFastMultiKeyHashSet(
+ false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+
+ @Test
+ public void testLargeAndExpand() throws Exception {
+ random = new Random(8462);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashSet map =
+ new VectorMapJoinFastMultiKeyHashSet(
+ false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
+
+ VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
index a45275b..e8bbee3 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
@@ -18,11 +18,14 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
import java.util.Random;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.junit.Test;
@@ -31,91 +34,141 @@ import static org.junit.Assert.*;
public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
@Test
- public void testPutGetOne() throws Exception {
- random = new Random(47496);
+ public void testOneKey() throws Exception {
+ random = new Random(33221);
VectorMapJoinFastLongHashMap map =
- new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
-
- RandomLongStream randomLongKeyStream = new RandomLongStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
-
- long key = randomLongKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- verifyHashMapResult(map, key, randomByteArrayValueStream.get(0));
-
- key = randomLongKeyStream.next();
- value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- verifyHashMapResult(map, key, randomByteArrayValueStream.get(1));
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
+
+ long key = random.nextLong();
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ verifyTable.verify(map);
+
+ // Second value.
+ value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ verifyTable.verify(map);
+
+ // Third value.
+ value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ verifyTable.verify(map);
}
@Test
- public void testPutGetMultiple() throws Exception {
- random = new Random(2990);
+ public void testMultipleKeysSingleValue() throws Exception {
+ random = new Random(900);
- VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
-
- RandomLongStream randomLongKeyStream = new RandomLongStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
-
- long key = randomLongKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- verifyHashMapResult(map, key, value);
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
+
+ int keyCount = 100 + random.nextInt(1000);
+ for (int i = 0; i < keyCount; i++) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
- // Same key, multiple values.
- for (int i = 0; i < 3; ++i) {
- value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- verifyHashMapResult(map, key, randomByteArrayValueStream);
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ // verifyTable.verify(map);
}
+ verifyTable.verify(map);
}
@Test
public void testGetNonExistent() throws Exception {
- random = new Random(16916);
+ random = new Random(450);
- VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
- RandomLongStream randomLongKeyStream = new RandomLongStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
+ VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
- long key = randomLongKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
+ long key1 = random.nextLong();
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
- key += 1;
- map.putRow(key, value);
+ map.testPutRow(key1, value);
+ verifyTable.add(key1, value);
+ verifyTable.verify(map);
- key += 1;
+ long key2 = key1 += 1;
VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult);
+ JoinUtil.JoinResult joinResult = map.lookup(key2, hashMapResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ assertTrue(!hashMapResult.hasRows());
+
+ map.testPutRow(key2, value);
+ verifyTable.add(key2, value);
+ verifyTable.verify(map);
+
+ long key3 = key2 += 1;
+ hashMapResult = map.createHashMapResult();
+ joinResult = map.lookup(key3, hashMapResult);
assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
assertTrue(!hashMapResult.hasRows());
}
@Test
- public void testPutWithFullMap() throws Exception {
- random = new Random(26078);
+ public void testFullMap() throws Exception {
+ random = new Random(93440);
// Make sure the map does not expand; should be able to find space.
- VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE);
-
- RandomLongStream randomLongKeyStream = new RandomLongStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
- for (int i = 0; i < CAPACITY; ++i) {
- long key = randomLongKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE);
+
+ VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
+
+ for (int i = 0; i < CAPACITY; i++) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ // verifyTable.verify(map);
}
- for (int i = 0; i < randomLongKeyStream.size(); ++i) {
- verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStream.get(i));
+ verifyTable.verify(map);
+
+ long anotherKey;
+ while (true) {
+ anotherKey = random.nextLong();
+ if (!verifyTable.contains(anotherKey)) {
+ // Unique keys for this test.
+ break;
+ }
}
- // assertEquals(CAPACITY, map.getCapacity());
- // Get of non-existent key should terminate..
- long anotherKey = randomLongKeyStream.next();
+
VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
JoinUtil.JoinResult joinResult = map.lookup(anotherKey, hashMapResult);
assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
@@ -123,97 +176,91 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
@Test
public void testExpand() throws Exception {
- random = new Random(22470);
+ random = new Random(5227);
// Start with capacity 1; make sure we expand on every put.
- VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE);
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE);
- RandomLongStream randomLongKeyStream = new RandomLongStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
+ VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
for (int i = 0; i < 18; ++i) {
- long key = randomLongKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- for (int j = 0; j <= i; ++j) {
- verifyHashMapResult(map, randomLongKeyStream.get(j), randomByteArrayValueStream.get(j));
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
}
+ byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+ random.nextBytes(value);
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ // verifyTable.verify(map);
}
+ verifyTable.verify(map);
// assertEquals(1 << 18, map.getCapacity());
}
- @Test
- public void testLarge() throws Exception {
- random = new Random(40719);
-
- // Use a large capacity that doesn't require expansion, yet.
- VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
-
- RandomLongStream randomLongKeyStream = new RandomLongStream(random);
-
- final int largeSize = 1000;
- RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize];
- for (int i = 0; i < largeSize; i++) {
- randomByteArrayValueStreams[i] = new RandomByteArrayStream(random);
- int count = generateLargeCount();
- long key = randomLongKeyStream.next();
- for (int v = 0; v < count; v++) {
- byte[] value = randomByteArrayValueStreams[i].next();
- map.putRow(key, value);
+ public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+ VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable)
+ throws HiveException, IOException {
+ for (int i = 0; i < keyCount; i++) {
+ byte[] value = new byte[generateLargeCount() - 1];
+ random.nextBytes(value);
+
+ // Add a new key or add a value to an existing key?
+ if (random.nextBoolean() || verifyTable.getCount() == 0) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key, value);
+ verifyTable.add(key, value);
+ verifyTable.verify(map);
+ } else {
+ long randomExistingKey = verifyTable.addRandomExisting(value, random);
+ map.testPutRow(randomExistingKey, value);
+ // verifyTable.verify(map);
}
- }
- for (int i = 0; i < largeSize; i++) {
- verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStreams[i]);
+ verifyTable.verify(map);
}
}
-
@Test
- public void testLargeAndExpand() throws Exception {
- random = new Random(46809);
+ public void testMultipleKeysMultipleValue() throws Exception {
+ random = new Random(8);
// Use a large capacity that doesn't require expansion, yet.
- VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
-
- RandomLongStream randomLongKeyStream = new RandomLongStream(random);
-
- final int largeSize = 1000;
- RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize];
- for (int i = 0; i < largeSize; i++) {
- randomByteArrayValueStreams[i] = new RandomByteArrayStream(random);
- int count = generateLargeCount();
- long key = randomLongKeyStream.next();
- for (int v = 0; v < count; v++) {
- byte[] value = randomByteArrayValueStreams[i].next();
- map.putRow(key, value);
- }
- }
- for (int i = 0; i < largeSize; i++) {
- verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStreams[i]);
- }
- }
-
- private void verifyHashMapResult(VectorMapJoinFastLongHashMap map, long key,
- RandomByteArrayStream randomByteArrayValueStream) {
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
- VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult);
- if (joinResult != JoinUtil.JoinResult.MATCH) {
- assertTrue(false);
- }
+ VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
- CommonFastHashTable.verifyHashMapResult(hashMapResult, randomByteArrayValueStream);
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
}
- private void verifyHashMapResult(VectorMapJoinFastLongHashMap map, long key,
- byte[] valueBytes) {
+ @Test
+ public void testLargeAndExpand() throws Exception {
+ random = new Random(20);
- VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult);
- if (joinResult != JoinUtil.JoinResult.MATCH) {
- assertTrue(false);
- }
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
- CommonFastHashTable.verifyHashMapResult(hashMapResult, valueBytes);
- }
+ VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
new file mode 100644
index 0000000..9e94611
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMultiSet;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMultiSet;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class TestVectorMapJoinFastLongHashMultiSet extends CommonFastHashTable {
+
+ @Test
+ public void testOneKey() throws Exception {
+ random = new Random(458);
+
+ VectorMapJoinFastLongHashMultiSet map =
+ new VectorMapJoinFastLongHashMultiSet(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
+
+ long key = random.nextLong();
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+
+ // Second time.
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+
+ // Third time.
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testMultipleKeysSingleValue() throws Exception {
+ random = new Random(8000);
+
+ VectorMapJoinFastLongHashMultiSet map =
+ new VectorMapJoinFastLongHashMultiSet(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
+
+ int keyCount = 100 + random.nextInt(1000);
+ for (int i = 0; i < keyCount; i++) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testGetNonExistent() throws Exception {
+ random = new Random(4000);
+
+ VectorMapJoinFastLongHashMultiSet map =
+ new VectorMapJoinFastLongHashMultiSet(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
+
+ long key1 = random.nextLong();
+
+ map.testPutRow(key1);
+ verifyTable.add(key1);
+ verifyTable.verify(map);
+
+ long key2 = key1 += 1;
+ VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key2, hashMultiSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ assertEquals(hashMultiSetResult.count(), 0);
+
+ map.testPutRow(key2);
+ verifyTable.add(key2);
+ verifyTable.verify(map);
+
+ long key3 = key2 += 1;
+ hashMultiSetResult = map.createHashMultiSetResult();
+ joinResult = map.contains(key3, hashMultiSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ assertEquals(hashMultiSetResult.count(), 0);
+ }
+
+ @Test
+ public void testFullMap() throws Exception {
+ random = new Random(25000);
+
+ // Make sure the map does not expand; should be able to find space.
+ VectorMapJoinFastLongHashMultiSet map =
+ new VectorMapJoinFastLongHashMultiSet(
+ false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE);
+
+ VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
+
+ for (int i = 0; i < CAPACITY; i++) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+
+ long anotherKey;
+ while (true) {
+ anotherKey = random.nextLong();
+ if (!verifyTable.contains(anotherKey)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(anotherKey, hashMultiSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ }
+
+ @Test
+ public void testExpand() throws Exception {
+ random = new Random(30000);
+
+ // Start with capacity 1; make sure we expand on every put.
+ VectorMapJoinFastLongHashMultiSet map =
+ new VectorMapJoinFastLongHashMultiSet(
+ false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE);
+
+ VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
+
+ for (int i = 0; i < 18; ++i) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ // assertEquals(1 << 18, map.getCapacity());
+ }
+
+ public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+ VectorMapJoinFastLongHashMultiSet map, VerifyFastLongHashMultiSet verifyTable)
+ throws HiveException, IOException {
+ for (int i = 0; i < keyCount; i++) {
+ byte[] value = new byte[generateLargeCount() - 1];
+ random.nextBytes(value);
+
+ // Add a new key or add a value to an existing key?
+ if (random.nextBoolean() || verifyTable.getCount() == 0) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+ } else {
+ long randomExistingKey = verifyTable.addRandomExisting(value, random);
+ map.testPutRow(randomExistingKey);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ }
+ }
+ @Test
+ public void testMultipleKeysMultipleValue() throws Exception {
+ random = new Random(333);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMultiSet map =
+ new VectorMapJoinFastLongHashMultiSet(
+ false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+
+ @Test
+ public void testLargeAndExpand() throws Exception {
+ random = new Random(790);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMultiSet map =
+ new VectorMapJoinFastLongHashMultiSet(
+ false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
+
+ VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
new file mode 100644
index 0000000..698bcdc
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
@@ -0,0 +1,250 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashSet;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashSet;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class TestVectorMapJoinFastLongHashSet extends CommonFastHashTable {
+
+ @Test
+ public void testOneKey() throws Exception {
+ random = new Random(4186);
+
+ VectorMapJoinFastLongHashSet map =
+ new VectorMapJoinFastLongHashSet(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
+
+ long key = random.nextLong();
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+
+ // Second time.
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+
+ // Third time.
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testMultipleKeysSingleValue() throws Exception {
+ random = new Random(1412);
+
+ VectorMapJoinFastLongHashSet map =
+ new VectorMapJoinFastLongHashSet(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
+
+ int keyCount = 100 + random.nextInt(1000);
+ for (int i = 0; i < keyCount; i++) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ }
+
+ @Test
+ public void testGetNonExistent() throws Exception {
+ random = new Random(100);
+
+ VectorMapJoinFastLongHashSet map =
+ new VectorMapJoinFastLongHashSet(
+ false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
+
+ VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
+
+ long key1 = random.nextLong();
+
+ map.testPutRow(key1);
+ verifyTable.add(key1);
+ verifyTable.verify(map);
+
+ long key2 = key1 += 1;
+ VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key2, hashSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+
+ map.testPutRow(key2);
+ verifyTable.add(key2);
+ verifyTable.verify(map);
+
+ long key3 = key2 += 1;
+ hashSetResult = map.createHashSetResult();
+ joinResult = map.contains(key3, hashSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ }
+
+ @Test
+ public void testFullMap() throws Exception {
+ random = new Random(2520);
+
+ // Make sure the map does not expand; should be able to find space.
+ VectorMapJoinFastLongHashSet map =
+ new VectorMapJoinFastLongHashSet(
+ false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE);
+
+ VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
+
+ for (int i = 0; i < CAPACITY; i++) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+
+ long anotherKey;
+ while (true) {
+ anotherKey = random.nextLong();
+ if (!verifyTable.contains(anotherKey)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(anotherKey, hashSetResult);
+ assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
+ }
+
+ @Test
+ public void testExpand() throws Exception {
+ random = new Random(348);
+
+ // Start with capacity 1; make sure we expand on every put.
+ VectorMapJoinFastLongHashSet map =
+ new VectorMapJoinFastLongHashSet(
+ false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE);
+
+ VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
+
+ for (int i = 0; i < 18; ++i) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ // assertEquals(1 << 18, map.getCapacity());
+ }
+
+ public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+ VectorMapJoinFastLongHashSet map, VerifyFastLongHashSet verifyTable)
+ throws HiveException, IOException {
+ for (int i = 0; i < keyCount; i++) {
+ byte[] value = new byte[generateLargeCount() - 1];
+ random.nextBytes(value);
+
+ // Add a new key or add a value to an existing key?
+ if (random.nextBoolean() || verifyTable.getCount() == 0) {
+ long key;
+ while (true) {
+ key = random.nextLong();
+ if (!verifyTable.contains(key)) {
+ // Unique keys for this test.
+ break;
+ }
+ }
+
+ map.testPutRow(key);
+ verifyTable.add(key);
+ verifyTable.verify(map);
+ } else {
+ long randomExistingKey = verifyTable.addRandomExisting(value, random);
+ map.testPutRow(randomExistingKey);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map);
+ }
+ }
+ @Test
+ public void testMultipleKeysMultipleValue() throws Exception {
+ random = new Random(7778);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashSet map =
+ new VectorMapJoinFastLongHashSet(
+ false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+
+ @Test
+ public void testLargeAndExpand() throws Exception {
+ random = new Random(56);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashSet map =
+ new VectorMapJoinFastLongHashSet(
+ false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
+
+ VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
+
+ int keyCount = 1000;
+ addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
deleted file mode 100644
index 944bda6..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
-
-import java.util.Random;
-
-import org.apache.hadoop.hive.ql.exec.JoinUtil;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-
-public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
-
- @Test
- public void testPutGetOne() throws Exception {
- random = new Random(47496);
-
- VectorMapJoinFastMultiKeyHashMap map =
- new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);
-
- RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
-
- byte[] key = randomByteArrayKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- verifyHashMapResult(map, key, randomByteArrayValueStream.get(0));
-
- key = randomByteArrayKeyStream.next();
- value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- verifyHashMapResult(map, key, randomByteArrayValueStream.get(1));
- }
-
- @Test
- public void testPutGetMultiple() throws Exception {
- random = new Random(2990);
-
- VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);
-
- RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
-
- byte[] key = randomByteArrayKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- verifyHashMapResult(map, key, value);
-
- // Same key, multiple values.
- for (int i = 0; i < 3; ++i) {
- value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- verifyHashMapResult(map, key, randomByteArrayValueStream);
- }
- }
-
- @Test
- public void testGetNonExistent() throws Exception {
- random = new Random(16916);
-
- VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);
-
- RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
-
- byte[] key = randomByteArrayKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
-
- key[0] = (byte) (key[0] + 1);
- map.putRow(key, value);
-
- key[0] = (byte) (key[0] + 1);
- VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult);
- assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
- assertTrue(!hashMapResult.hasRows());
- }
-
- @Test
- public void testPutWithFullMap() throws Exception {
- random = new Random(26078);
-
- // Make sure the map does not expand; should be able to find space.
- VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE);
-
- RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
- for (int i = 0; i < CAPACITY; ++i) {
- byte[] key = randomByteArrayKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- }
- for (int i = 0; i < randomByteArrayKeyStream.size(); ++i) {
- verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStream.get(i));
- }
- // assertEquals(CAPACITY, map.getCapacity());
- // Get of non-existent key should terminate..
- byte[] anotherKey = randomByteArrayKeyStream.next();
- VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(anotherKey, 0, anotherKey.length, hashMapResult);
- assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH);
- }
-
- @Test
- public void testExpand() throws Exception {
- random = new Random(22470);
-
- // Start with capacity 1; make sure we expand on every put.
- VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE);
-
- RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
- RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
-
- for (int i = 0; i < 18; ++i) {
- byte[] key = randomByteArrayKeyStream.next();
- byte[] value = randomByteArrayValueStream.next();
- map.putRow(key, value);
- for (int j = 0; j <= i; ++j) {
- verifyHashMapResult(map, randomByteArrayKeyStream.get(j), randomByteArrayValueStream.get(j));
- }
- }
- // assertEquals(1 << 18, map.getCapacity());
- }
-
- @Test
- public void testLarge() throws Exception {
- random = new Random(5231);
-
- // Use a large capacity that doesn't require expansion, yet.
- VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
-
- RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10);
-
- final int largeSize = 1000;
- RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize];
- for (int i = 0; i < largeSize; i++) {
- randomByteArrayValueStreams[i] = new RandomByteArrayStream(random);
- int count = generateLargeCount();
- byte[] key = randomByteArrayKeyStream.next();
- VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult);
- if (joinResult == JoinUtil.JoinResult.MATCH) {
- // A problem or need different random seed / longer key?
- assertTrue(false);
- }
- for (int v = 0; v < count; v++) {
- byte[] value = randomByteArrayValueStreams[i].next();
- map.putRow(key, value);
- }
- }
- for (int i = 0; i < largeSize; i++) {
- verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStreams[i]);
- }
- }
-
- @Test
- public void testLargeAndExpand() throws Exception {
- random = new Random(46809);
-
- // Use a large capacity that doesn't require expansion, yet.
- VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
-
- RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10);
-
- final int largeSize = 1000;
- RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize];
- for (int i = 0; i < largeSize; i++) {
- randomByteArrayValueStreams[i] = new RandomByteArrayStream(random);
- int count = generateLargeCount();
- byte[] key = randomByteArrayKeyStream.next();
- VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult);
- if (joinResult == JoinUtil.JoinResult.MATCH) {
- // A problem or need different random seed / longer key?
- assertTrue(false);
- }
- for (int v = 0; v < count; v++) {
- byte[] value = randomByteArrayValueStreams[i].next();
- map.putRow(key, value);
- }
- }
- for (int i = 0; i < largeSize; i++) {
- verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStreams[i]);
- }
- }
-
- private void verifyHashMapResult(VectorMapJoinFastMultiKeyHashMap map, byte[] key,
- RandomByteArrayStream randomByteArrayValueStream) {
-
- VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult);
- if (joinResult != JoinUtil.JoinResult.MATCH) {
- assertTrue(false);
- }
-
- CommonFastHashTable.verifyHashMapResult(hashMapResult, randomByteArrayValueStream);
- }
-
- private void verifyHashMapResult(VectorMapJoinFastMultiKeyHashMap map, byte[] key,
- byte[] valueBytes) {
-
- VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
- JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult);
- if (joinResult != JoinUtil.JoinResult.MATCH) {
- assertTrue(false);
- }
-
- CommonFastHashTable.verifyHashMapResult(hashMapResult, valueBytes);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
new file mode 100644
index 0000000..1bb990c
--- /dev/null
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
@@ -0,0 +1,423 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.fast;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.RandomTypeUtil;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hive.common.util.DateUtils;
+
+/**
+ * Generate object inspector and random row object[].
+ */
+public class RandomRowObjectSource {
+
+ private Random r;
+
+ private int columnCount;
+
+ private List<String> typeNames;
+
+ private PrimitiveCategory[] primitiveCategories;
+
+ private PrimitiveTypeInfo[] primitiveTypeInfos;
+
+ private List<ObjectInspector> primitiveObjectInspectorList;
+
+ private StructObjectInspector rowStructObjectInspector;
+
+ public List<String> typeNames() {
+ return typeNames;
+ }
+
+ public PrimitiveCategory[] primitiveCategories() {
+ return primitiveCategories;
+ }
+
+ public PrimitiveTypeInfo[] primitiveTypeInfos() {
+ return primitiveTypeInfos;
+ }
+
+ public StructObjectInspector rowStructObjectInspector() {
+ return rowStructObjectInspector;
+ }
+
+ public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
+ ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
+ new ArrayList<ObjectInspector>(partialFieldCount);
+ List<String> columnNames = new ArrayList<String>(partialFieldCount);
+ for (int i = 0; i < partialFieldCount; i++) {
+ columnNames.add(String.format("partial%d", i));
+ partialPrimitiveObjectInspectorList.add(
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ primitiveTypeInfos[i]));
+ }
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(
+ columnNames, primitiveObjectInspectorList);
+ }
+
+ public void init(Random r) {
+ this.r = r;
+ chooseSchema();
+ }
+
+ /*
+ * For now, exclude CHAR until we determine why there is a difference (blank padding)
+ * serializing with LazyBinarySerializeWrite and the regular SerDe...
+ */
+ private static String[] possibleHiveTypeNames = {
+ "boolean",
+ "tinyint",
+ "smallint",
+ "int",
+ "bigint",
+ "date",
+ "float",
+ "double",
+ "string",
+// "char",
+ "varchar",
+ "binary",
+ "date",
+ "timestamp",
+ "interval_year_month",
+ "interval_day_time",
+ "decimal"
+ };
+
+ private void chooseSchema() {
+ HashSet hashSet = null;
+ boolean allTypes;
+ boolean onlyOne = (r.nextInt(100) == 7);
+ if (onlyOne) {
+ columnCount = 1;
+ allTypes = false;
+ } else {
+ allTypes = r.nextBoolean();
+ if (allTypes) {
+ // One of each type.
+ columnCount = possibleHiveTypeNames.length;
+ hashSet = new HashSet<Integer>();
+ } else {
+ columnCount = 1 + r.nextInt(20);
+ }
+ }
+ typeNames = new ArrayList<String>(columnCount);
+ primitiveCategories = new PrimitiveCategory[columnCount];
+ primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
+ primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
+ List<String> columnNames = new ArrayList<String>(columnCount);
+ for (int c = 0; c < columnCount; c++) {
+ columnNames.add(String.format("col%d", c));
+ String typeName;
+
+ if (onlyOne) {
+ typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
+ } else {
+ int typeNum;
+ if (allTypes) {
+ while (true) {
+ typeNum = r.nextInt(possibleHiveTypeNames.length);
+ Integer typeNumInteger = new Integer(typeNum);
+ if (!hashSet.contains(typeNumInteger)) {
+ hashSet.add(typeNumInteger);
+ break;
+ }
+ }
+ } else {
+ typeNum = r.nextInt(possibleHiveTypeNames.length);
+ }
+ typeName = possibleHiveTypeNames[typeNum];
+ }
+ if (typeName.equals("char")) {
+ int maxLength = 1 + r.nextInt(100);
+ typeName = String.format("char(%d)", maxLength);
+ } else if (typeName.equals("varchar")) {
+ int maxLength = 1 + r.nextInt(100);
+ typeName = String.format("varchar(%d)", maxLength);
+ } else if (typeName.equals("decimal")) {
+ typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
+ }
+ PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+ primitiveTypeInfos[c] = primitiveTypeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+ primitiveCategories[c] = primitiveCategory;
+ primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+ typeNames.add(typeName);
+ }
+ rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
+ }
+
+ public Object[][] randomRows(int n) {
+ Object[][] result = new Object[n][];
+ for (int i = 0; i < n; i++) {
+ result[i] = randomRow();
+ }
+ return result;
+ }
+
+ public Object[] randomRow() {
+ Object row[] = new Object[columnCount];
+ for (int c = 0; c < columnCount; c++) {
+ Object object = randomObject(c);
+ if (object == null) {
+ throw new Error("Unexpected null for column " + c);
+ }
+ row[c] = getWritableObject(c, object);
+ if (row[c] == null) {
+ throw new Error("Unexpected null for writable for column " + c);
+ }
+ }
+ return row;
+ }
+
+ public static void sort(Object[][] rows, ObjectInspector oi) {
+ for (int i = 0; i < rows.length; i++) {
+ for (int j = i + 1; j < rows.length; j++) {
+ if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
+ Object[] t = rows[i];
+ rows[i] = rows[j];
+ rows[j] = t;
+ }
+ }
+ }
+ }
+
+ public void sort(Object[][] rows) {
+ RandomRowObjectSource.sort(rows, rowStructObjectInspector);
+ }
+
+ public Object getWritableObject(int column, Object object) {
+ ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
+ PrimitiveCategory primitiveCategory = primitiveCategories[column];
+ PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
+ case BYTE:
+ return ((WritableByteObjectInspector) objectInspector).create((byte) object);
+ case SHORT:
+ return ((WritableShortObjectInspector) objectInspector).create((short) object);
+ case INT:
+ return ((WritableIntObjectInspector) objectInspector).create((int) object);
+ case LONG:
+ return ((WritableLongObjectInspector) objectInspector).create((long) object);
+ case DATE:
+ return ((WritableDateObjectInspector) objectInspector).create((Date) object);
+ case FLOAT:
+ return ((WritableFloatObjectInspector) objectInspector).create((float) object);
+ case DOUBLE:
+ return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
+ case STRING:
+ return ((WritableStringObjectInspector) objectInspector).create((String) object);
+ case CHAR:
+ {
+ WritableHiveCharObjectInspector writableCharObjectInspector =
+ new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
+ return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
+ }
+ case VARCHAR:
+ {
+ WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
+ new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
+ return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
+ }
+ case BINARY:
+ return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
+ case TIMESTAMP:
+ return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
+ case INTERVAL_YEAR_MONTH:
+ return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
+ case INTERVAL_DAY_TIME:
+ return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
+ case DECIMAL:
+ {
+ WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
+ new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
+ return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
+ }
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategory);
+ }
+ }
+
+ public Object randomObject(int column) {
+ PrimitiveCategory primitiveCategory = primitiveCategories[column];
+ PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return Boolean.valueOf(r.nextInt(1) == 1);
+ case BYTE:
+ return Byte.valueOf((byte) r.nextInt());
+ case SHORT:
+ return Short.valueOf((short) r.nextInt());
+ case INT:
+ return Integer.valueOf(r.nextInt());
+ case LONG:
+ return Long.valueOf(r.nextLong());
+ case DATE:
+ return RandomTypeUtil.getRandDate(r);
+ case FLOAT:
+ return Float.valueOf(r.nextFloat() * 10 - 5);
+ case DOUBLE:
+ return Double.valueOf(r.nextDouble() * 10 - 5);
+ case STRING:
+ return RandomTypeUtil.getRandString(r);
+ case CHAR:
+ return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
+ case VARCHAR:
+ return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
+ case BINARY:
+ return getRandBinary(r, 1 + r.nextInt(100));
+ case TIMESTAMP:
+ return RandomTypeUtil.getRandTimestamp(r);
+ case INTERVAL_YEAR_MONTH:
+ return getRandIntervalYearMonth(r);
+ case INTERVAL_DAY_TIME:
+ return getRandIntervalDayTime(r);
+ case DECIMAL:
+ return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategory);
+ }
+ }
+
+ public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
+ int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
+ String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+ HiveChar hiveChar = new HiveChar(randomString, maxLength);
+ return hiveChar;
+ }
+
+ public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
+ int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
+ String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+ HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
+ return hiveVarchar;
+ }
+
+ public static byte[] getRandBinary(Random r, int len){
+ byte[] bytes = new byte[len];
+ for (int j = 0; j < len; j++){
+ bytes[j] = Byte.valueOf((byte) r.nextInt());
+ }
+ return bytes;
+ }
+
+ private static final String DECIMAL_CHARS = "0123456789";
+
+ public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
+ while (true) {
+ StringBuilder sb = new StringBuilder();
+ int precision = 1 + r.nextInt(18);
+ int scale = 0 + r.nextInt(precision + 1);
+
+ int integerDigits = precision - scale;
+
+ if (r.nextBoolean()) {
+ sb.append("-");
+ }
+
+ if (integerDigits == 0) {
+ sb.append("0");
+ } else {
+ sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
+ }
+ if (scale != 0) {
+ sb.append(".");
+ sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
+ }
+
+ HiveDecimal bd = HiveDecimal.create(sb.toString());
+ if (bd.scale() > bd.precision()) {
+ // Sometimes weird decimals are produced?
+ continue;
+ }
+
+ return bd;
+ }
+ }
+
+ public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
+ String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+ String intervalYearMonthStr = String.format("%s%d-%d",
+ yearMonthSignStr,
+ Integer.valueOf(1800 + r.nextInt(500)), // year
+ Integer.valueOf(0 + r.nextInt(12))); // month
+ HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
+ return intervalYearMonthVal;
+ }
+
+ public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
+ String optionalNanos = "";
+ if (r.nextInt(2) == 1) {
+ optionalNanos = String.format(".%09d",
+ Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
+ }
+ String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
+ String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
+ yearMonthSignStr,
+ Integer.valueOf(1 + r.nextInt(28)), // day
+ Integer.valueOf(0 + r.nextInt(24)), // hour
+ Integer.valueOf(0 + r.nextInt(60)), // minute
+ Integer.valueOf(0 + r.nextInt(60)), // second
+ optionalNanos);
+ HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
+ return intervalDayTimeVal;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
index 4415431..bbb35c7 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
@@ -40,7 +40,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
*
* Reading some fields require a results object to receive value information. A separate
* results object is created by the caller at initialization per different field even for the same
- * type.
+ * type.
*
* Some type values are by reference to either bytes in the deserialization buffer or to
* other type specific buffers. So, those references are only valid until the next time set is