You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/08/20 08:32:57 UTC
[2/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast
DeserializeRead classes; display better exception information;
add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
new file mode 100644
index 0000000..3f02eb3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
@@ -0,0 +1,718 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastRowHashMap.VerifyFastRowHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.junit.Test;
+
+/*
+ * An multi-key value hash map optimized for vector map join.
+ *
+ * The key is uninterpreted bytes.
+ */
+public class TestVectorMapJoinFastRowHashMap extends CommonFastHashTable {
+
+ private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows,
+ VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType,
+ VerifyFastRowHashMap verifyTable, String[] keyTypeNames,
+ boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
+
+ final int keyCount = keyTypeNames.length;
+ PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
+ PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
+ ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList =
+ new ArrayList<ObjectInspector>(keyCount);
+
+ for (int i = 0; i < keyCount; i++) {
+ PrimitiveTypeInfo primitiveTypeInfo =
+ (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
+ keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+ keyPrimitiveCategories[i] = primitiveCategory;
+ keyPrimitiveObjectInspectorList.add(
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+ }
+
+ boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
+ Arrays.fill(keyColumnSortOrderIsDesc, false);
+ byte[] keyColumnNullMarker = new byte[keyCount];
+ Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
+ byte[] keyColumnNotNullMarker = new byte[keyCount];
+ Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
+
+ BinarySortableSerializeWrite keySerializeWrite =
+ new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc,
+ keyColumnNullMarker, keyColumnNotNullMarker);
+
+
+ PrimitiveTypeInfo[] valuePrimitiveTypeInfos = valueSource.primitiveTypeInfos();
+ final int columnCount = valuePrimitiveTypeInfos.length;
+
+ SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
+
+ final int count = rows.length;
+ for (int i = 0; i < count; i++) {
+
+ Object[] valueRow = rows[i];
+
+ Output valueOutput = new Output();
+ ((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
+
+ for (int index = 0; index < columnCount; index++) {
+
+ Writable writable = (Writable) valueRow[index];
+
+ VerifyFastRow.serializeWrite(valueSerializeWrite, valuePrimitiveTypeInfos[index], writable);
+ }
+
+ byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
+
+ // Add a new key or add a value to an existing key?
+ byte[] key;
+ if (random.nextBoolean() || verifyTable.getCount() == 0) {
+ Object[] keyRow =
+ VectorRandomRowSource.randomRow(keyCount, random, keyPrimitiveObjectInspectorList,
+ keyPrimitiveCategories, keyPrimitiveTypeInfos);
+
+ Output keyOutput = new Output();
+ keySerializeWrite.set(keyOutput);
+
+ for (int index = 0; index < keyCount; index++) {
+
+ Writable writable = (Writable) keyRow[index];
+
+ VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], writable);
+ }
+
+ key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
+
+ verifyTable.add(key, keyRow, value, valueRow);
+ } else {
+ key = verifyTable.addRandomExisting(value, valueRow, random);
+ }
+
+ // Serialize keyRow into key bytes.
+ BytesWritable keyWritable = new BytesWritable(key);
+ BytesWritable valueWritable = new BytesWritable(value);
+ map.putRow(keyWritable, valueWritable);
+ // verifyTable.verify(map);
+ }
+ verifyTable.verify(map, hashTableKeyType, valuePrimitiveTypeInfos,
+ doClipping, useExactBytes, random);
+ }
+
+ @Test
+ public void testBigIntRows() throws Exception {
+ random = new Random(927337);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.LONG, verifyTable,
+ new String[] { "bigint" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testIntRows() throws Exception {
+ random = new Random(927337);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.INT,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.INT, verifyTable,
+ new String[] { "int" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testStringRows() throws Exception {
+ random = new Random(927337);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastStringHashMap map =
+ new VectorMapJoinFastStringHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.STRING, verifyTable,
+ new String[] { "string" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRows1() throws Exception {
+ random = new Random(833);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "int", "int" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRows2() throws Exception {
+ random = new Random(833099);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "string", "string" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRows3() throws Exception {
+ random = new Random(833099);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "bigint", "timestamp", "double" },
+ /* doClipping */ false, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testBigIntRowsClipped() throws Exception {
+ random = new Random(326232);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.LONG, verifyTable,
+ new String[] { "bigint" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testIntRowsClipped() throws Exception {
+ random = new Random(326232);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.INT,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.INT, verifyTable,
+ new String[] { "int" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testStringRowsClipped() throws Exception {
+ random = new Random(326232);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastStringHashMap map =
+ new VectorMapJoinFastStringHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.STRING, verifyTable,
+ new String[] { "string" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRowsClipped1() throws Exception {
+ random = new Random(2331);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "varchar(20)", "date", "interval_day_time" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRowsClipped2() throws Exception {
+ random = new Random(7403);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "varchar(20)", "varchar(40)" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+ @Test
+ public void testMultiKeyRowsClipped3() throws Exception {
+ random = new Random(99);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "float", "tinyint" },
+ /* doClipping */ true, /* useExactBytes */ false);
+ }
+
+
+ @Test
+ public void testBigIntRowsExact() throws Exception {
+ random = new Random(27722);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.LONG, verifyTable,
+ new String[] { "bigint" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testIntRowsExact() throws Exception {
+ random = new Random(8238383);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.INT,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.INT, verifyTable,
+ new String[] { "int" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testStringRowsExact() throws Exception {
+ random = new Random(8235);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastStringHashMap map =
+ new VectorMapJoinFastStringHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.STRING, verifyTable,
+ new String[] { "string" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsExact1() throws Exception {
+ random = new Random(8235);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "string", "string", "string", "string" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsExact2() throws Exception {
+ random = new Random(8235);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "smallint" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsExact3() throws Exception {
+ random = new Random(8235);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "int", "binary" },
+ /* doClipping */ false, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testBigIntRowsClippedExact() throws Exception {
+ random = new Random(2122);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.LONG,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.LONG, verifyTable,
+ new String[] { "bigint" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testIntRowsClippedExact() throws Exception {
+ random = new Random(7520);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMap map =
+ new VectorMapJoinFastLongHashMap(
+ false, false, HashTableKeyType.INT,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.INT, verifyTable,
+ new String[] { "int" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testStringRowsClippedExact() throws Exception {
+ random = new Random(7539);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastStringHashMap map =
+ new VectorMapJoinFastStringHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.STRING, verifyTable,
+ new String[] { "string" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsClippedExact1() throws Exception {
+ random = new Random(13);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "interval_year_month", "decimal(12,8)" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsClippedExact2() throws Exception {
+ random = new Random(12);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "bigint", "string", "int" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+
+ @Test
+ public void testMultiKeyRowsClippedExact3() throws Exception {
+ random = new Random(7);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastMultiKeyHashMap map =
+ new VectorMapJoinFastMultiKeyHashMap(
+ false,
+ LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+ valueSource.init(random);
+
+ int rowCount = 10000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(valueSource, rows,
+ map, HashTableKeyType.MULTI_KEY, verifyTable,
+ new String[] { "bigint", "string", "varchar(5000)" },
+ /* doClipping */ true, /* useExactBytes */ true);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
new file mode 100644
index 0000000..118e9e2
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
@@ -0,0 +1,397 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * (Copy of VerifyFast from serde).
+ *
+ */
+public class VerifyFastRow {
+
+ public static void verifyDeserializeRead(DeserializeRead deserializeRead,
+ PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+
+ boolean isNull;
+
+ isNull = deserializeRead.readCheckNull();
+ if (isNull) {
+ if (writable != null) {
+ TestCase.fail("Field reports null but object is not null");
+ }
+ return;
+ } else if (writable == null) {
+ TestCase.fail("Field report not null but object is null");
+ }
+ switch (primitiveTypeInfo.getPrimitiveCategory()) {
+ case BOOLEAN:
+ {
+ boolean value = deserializeRead.currentBoolean;
+ if (!(writable instanceof BooleanWritable)) {
+ TestCase.fail("Boolean expected writable not Boolean");
+ }
+ boolean expected = ((BooleanWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case BYTE:
+ {
+ byte value = deserializeRead.currentByte;
+ if (!(writable instanceof ByteWritable)) {
+ TestCase.fail("Byte expected writable not Byte");
+ }
+ byte expected = ((ByteWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
+ }
+ }
+ break;
+ case SHORT:
+ {
+ short value = deserializeRead.currentShort;
+ if (!(writable instanceof ShortWritable)) {
+ TestCase.fail("Short expected writable not Short");
+ }
+ short expected = ((ShortWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case INT:
+ {
+ int value = deserializeRead.currentInt;
+ if (!(writable instanceof IntWritable)) {
+ TestCase.fail("Integer expected writable not Integer");
+ }
+ int expected = ((IntWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case LONG:
+ {
+ long value = deserializeRead.currentLong;
+ if (!(writable instanceof LongWritable)) {
+ TestCase.fail("Long expected writable not Long");
+ }
+ Long expected = ((LongWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case FLOAT:
+ {
+ float value = deserializeRead.currentFloat;
+ if (!(writable instanceof FloatWritable)) {
+ TestCase.fail("Float expected writable not Float");
+ }
+ float expected = ((FloatWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case DOUBLE:
+ {
+ double value = deserializeRead.currentDouble;
+ if (!(writable instanceof DoubleWritable)) {
+ TestCase.fail("Double expected writable not Double");
+ }
+ double expected = ((DoubleWritable) writable).get();
+ if (value != expected) {
+ TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
+ }
+ }
+ break;
+ case STRING:
+ {
+ byte[] stringBytes = Arrays.copyOfRange(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+ Text text = new Text(stringBytes);
+ String string = text.toString();
+ String expected = ((Text) writable).toString();
+ if (!string.equals(expected)) {
+ TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
+ }
+ }
+ break;
+ case CHAR:
+ {
+ byte[] stringBytes = Arrays.copyOfRange(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+ Text text = new Text(stringBytes);
+ String string = text.toString();
+
+ HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
+
+ HiveChar expected = ((HiveCharWritable) writable).getHiveChar();
+ if (!hiveChar.equals(expected)) {
+ TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
+ }
+ }
+ break;
+ case VARCHAR:
+ {
+ byte[] stringBytes = Arrays.copyOfRange(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+ Text text = new Text(stringBytes);
+ String string = text.toString();
+
+ HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
+
+ HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar();
+ if (!hiveVarchar.equals(expected)) {
+ TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
+ }
+ }
+ break;
+ case DECIMAL:
+ {
+ HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
+ if (value == null) {
+ TestCase.fail("Decimal field evaluated to NULL");
+ }
+ HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal();
+ if (!value.equals(expected)) {
+ DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
+ int precision = decimalTypeInfo.getPrecision();
+ int scale = decimalTypeInfo.getScale();
+ TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
+ }
+ }
+ break;
+ case DATE:
+ {
+ Date value = deserializeRead.currentDateWritable.get();
+ Date expected = ((DateWritable) writable).get();
+ if (!value.equals(expected)) {
+ TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+ }
+ }
+ break;
+ case TIMESTAMP:
+ {
+ Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
+ Timestamp expected = ((TimestampWritable) writable).getTimestamp();
+ if (!value.equals(expected)) {
+ TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+ }
+ }
+ break;
+ case INTERVAL_YEAR_MONTH:
+ {
+ HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
+ HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
+ if (!value.equals(expected)) {
+ TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+ }
+ }
+ break;
+ case INTERVAL_DAY_TIME:
+ {
+ HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
+ HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
+ if (!value.equals(expected)) {
+ TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+ }
+ }
+ break;
+ case BINARY:
+ {
+ byte[] byteArray = Arrays.copyOfRange(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+ BytesWritable bytesWritable = (BytesWritable) writable;
+ byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
+ if (byteArray.length != expected.length){
+ TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+ + " found " + Arrays.toString(byteArray) + ")");
+ }
+ for (int b = 0; b < byteArray.length; b++) {
+ if (byteArray[b] != expected[b]) {
+ TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+ + " found " + Arrays.toString(byteArray) + ")");
+ }
+ }
+ }
+ break;
+ default:
+ throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
+ }
+ }
+
+ public static void serializeWrite(SerializeWrite serializeWrite,
+ PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+ if (writable == null) {
+ serializeWrite.writeNull();
+ return;
+ }
+ switch (primitiveTypeInfo.getPrimitiveCategory()) {
+ case BOOLEAN:
+ {
+ boolean value = ((BooleanWritable) writable).get();
+ serializeWrite.writeBoolean(value);
+ }
+ break;
+ case BYTE:
+ {
+ byte value = ((ByteWritable) writable).get();
+ serializeWrite.writeByte(value);
+ }
+ break;
+ case SHORT:
+ {
+ short value = ((ShortWritable) writable).get();
+ serializeWrite.writeShort(value);
+ }
+ break;
+ case INT:
+ {
+ int value = ((IntWritable) writable).get();
+ serializeWrite.writeInt(value);
+ }
+ break;
+ case LONG:
+ {
+ long value = ((LongWritable) writable).get();
+ serializeWrite.writeLong(value);
+ }
+ break;
+ case FLOAT:
+ {
+ float value = ((FloatWritable) writable).get();
+ serializeWrite.writeFloat(value);
+ }
+ break;
+ case DOUBLE:
+ {
+ double value = ((DoubleWritable) writable).get();
+ serializeWrite.writeDouble(value);
+ }
+ break;
+ case STRING:
+ {
+ Text value = (Text) writable;
+ byte[] stringBytes = value.getBytes();
+ int stringLength = stringBytes.length;
+ serializeWrite.writeString(stringBytes, 0, stringLength);
+ }
+ break;
+ case CHAR:
+ {
+ HiveChar value = ((HiveCharWritable) writable).getHiveChar();
+ serializeWrite.writeHiveChar(value);
+ }
+ break;
+ case VARCHAR:
+ {
+ HiveVarchar value = ((HiveVarcharWritable) writable).getHiveVarchar();
+ serializeWrite.writeHiveVarchar(value);
+ }
+ break;
+ case DECIMAL:
+ {
+ HiveDecimal value = ((HiveDecimalWritable) writable).getHiveDecimal();
+ DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)primitiveTypeInfo;
+ serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
+ }
+ break;
+ case DATE:
+ {
+ Date value = ((DateWritable) writable).get();
+ serializeWrite.writeDate(value);
+ }
+ break;
+ case TIMESTAMP:
+ {
+ Timestamp value = ((TimestampWritable) writable).getTimestamp();
+ serializeWrite.writeTimestamp(value);
+ }
+ break;
+ case INTERVAL_YEAR_MONTH:
+ {
+ HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
+ serializeWrite.writeHiveIntervalYearMonth(value);
+ }
+ break;
+ case INTERVAL_DAY_TIME:
+ {
+ HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
+ serializeWrite.writeHiveIntervalDayTime(value);
+ }
+ break;
+ case BINARY:
+ {
+ BytesWritable byteWritable = (BytesWritable) writable;
+ byte[] binaryBytes = byteWritable.getBytes();
+ int length = byteWritable.getLength();
+ serializeWrite.writeBinary(binaryBytes, 0, length);
+ }
+ break;
+ default:
+ throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
index be36ba4..003a2d4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
@@ -42,7 +42,7 @@ import org.apache.hadoop.io.Text;
*
* Reading some fields require a results object to receive value information. A separate
* results object is created by the caller at initialization per different field even for the same
- * type.
+ * type.
*
* Some type values are by reference to either bytes in the deserialization buffer or to
* other type specific buffers. So, those references are only valid until the next time set is
@@ -61,6 +61,8 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
private int fieldCount;
private int start;
+ private int end;
+ private int fieldStart;
private byte[] tempTimestampBytes;
private Text tempText;
@@ -68,7 +70,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
private byte[] tempDecimalBuffer;
private boolean readBeyondConfiguredFieldsWarned;
- private boolean readBeyondBufferRangeWarned;
private boolean bufferRangeHasExtraDataWarned;
private InputByteBuffer inputByteBuffer = new InputByteBuffer();
@@ -92,7 +93,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
}
inputByteBuffer = new InputByteBuffer();
readBeyondConfiguredFieldsWarned = false;
- readBeyondBufferRangeWarned = false;
bufferRangeHasExtraDataWarned = false;
}
@@ -107,8 +107,40 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
@Override
public void set(byte[] bytes, int offset, int length) {
fieldIndex = -1;
- inputByteBuffer.reset(bytes, offset, offset + length);
start = offset;
+ end = offset + length;
+ inputByteBuffer.reset(bytes, start, end);
+ }
+
+ /*
+ * Get detailed read position information to help diagnose exceptions.
+ */
+ public String getDetailedReadPositionString() {
+ StringBuffer sb = new StringBuffer();
+
+ sb.append("Reading inputByteBuffer of length ");
+ sb.append(inputByteBuffer.getEnd());
+ sb.append(" at start offset ");
+ sb.append(start);
+ sb.append(" for length ");
+ sb.append(end - start);
+ sb.append(" to read ");
+ sb.append(fieldCount);
+ sb.append(" fields with types ");
+ sb.append(Arrays.toString(typeInfos));
+ sb.append(". ");
+ if (fieldIndex == -1) {
+ sb.append("Before first field?");
+ } else {
+ sb.append("Read field #");
+ sb.append(fieldIndex);
+ sb.append(" at field start position ");
+ sb.append(fieldStart);
+ sb.append(" current read offset ");
+ sb.append(inputByteBuffer.tell());
+ }
+
+ return sb.toString();
}
/*
@@ -133,12 +165,11 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
}
if (inputByteBuffer.isEof()) {
// Also, reading beyond our byte range produces NULL.
- if (!readBeyondBufferRangeWarned) {
- doReadBeyondBufferRangeWarned();
- }
- // We cannot read beyond so we must return NULL here.
return true;
}
+
+ fieldStart = inputByteBuffer.tell();
+
byte isNullByte = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]);
if (isNullByte == 0) {
@@ -298,7 +329,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
factor = -factor;
}
- int start = inputByteBuffer.tell();
+ int decimalStart = inputByteBuffer.tell();
int length = 0;
do {
@@ -317,7 +348,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
tempDecimalBuffer = new byte[length];
}
- inputByteBuffer.seek(start);
+ inputByteBuffer.seek(decimalStart);
for (int i = 0; i < length; ++i) {
tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert);
}
@@ -392,10 +423,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
return readBeyondConfiguredFieldsWarned;
}
@Override
- public boolean readBeyondBufferRangeWarned() {
- return readBeyondBufferRangeWarned;
- }
- @Override
public boolean bufferRangeHasExtraDataWarned() {
return bufferRangeHasExtraDataWarned;
}
@@ -410,14 +437,4 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
+ " reading more (NULLs returned). Ignoring similar problems.");
readBeyondConfiguredFieldsWarned = true;
}
-
- private void doReadBeyondBufferRangeWarned() {
- // Warn only once.
- int length = inputByteBuffer.tell() - start;
- LOG.info("Reading beyond buffer range! Buffer range " + start
- + " for length " + length + " but reading more... "
- + "(total buffer length " + inputByteBuffer.getData().length + ")"
- + " Ignoring similar problems.");
- readBeyondBufferRangeWarned = true;
- }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
index 2fad2af..8f3e771 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
@@ -143,10 +143,14 @@ public abstract class DeserializeRead {
* Read integrity warning flags.
*/
public abstract boolean readBeyondConfiguredFieldsWarned();
- public abstract boolean readBeyondBufferRangeWarned();
public abstract boolean bufferRangeHasExtraDataWarned();
/*
+ * Get detailed read position information to help diagnose exceptions.
+ */
+ public abstract String getDetailedReadPositionString();
+
+ /*
* These members hold the current value that was read when readCheckNull return false.
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
deleted file mode 100644
index 1bb990c..0000000
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
+++ /dev/null
@@ -1,423 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.serde2.fast;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.common.type.RandomTypeUtil;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hive.common.util.DateUtils;
-
-/**
- * Generate object inspector and random row object[].
- */
-public class RandomRowObjectSource {
-
- private Random r;
-
- private int columnCount;
-
- private List<String> typeNames;
-
- private PrimitiveCategory[] primitiveCategories;
-
- private PrimitiveTypeInfo[] primitiveTypeInfos;
-
- private List<ObjectInspector> primitiveObjectInspectorList;
-
- private StructObjectInspector rowStructObjectInspector;
-
- public List<String> typeNames() {
- return typeNames;
- }
-
- public PrimitiveCategory[] primitiveCategories() {
- return primitiveCategories;
- }
-
- public PrimitiveTypeInfo[] primitiveTypeInfos() {
- return primitiveTypeInfos;
- }
-
- public StructObjectInspector rowStructObjectInspector() {
- return rowStructObjectInspector;
- }
-
- public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
- ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
- new ArrayList<ObjectInspector>(partialFieldCount);
- List<String> columnNames = new ArrayList<String>(partialFieldCount);
- for (int i = 0; i < partialFieldCount; i++) {
- columnNames.add(String.format("partial%d", i));
- partialPrimitiveObjectInspectorList.add(
- PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
- primitiveTypeInfos[i]));
- }
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- columnNames, primitiveObjectInspectorList);
- }
-
- public void init(Random r) {
- this.r = r;
- chooseSchema();
- }
-
- /*
- * For now, exclude CHAR until we determine why there is a difference (blank padding)
- * serializing with LazyBinarySerializeWrite and the regular SerDe...
- */
- private static String[] possibleHiveTypeNames = {
- "boolean",
- "tinyint",
- "smallint",
- "int",
- "bigint",
- "date",
- "float",
- "double",
- "string",
-// "char",
- "varchar",
- "binary",
- "date",
- "timestamp",
- "interval_year_month",
- "interval_day_time",
- "decimal"
- };
-
- private void chooseSchema() {
- HashSet hashSet = null;
- boolean allTypes;
- boolean onlyOne = (r.nextInt(100) == 7);
- if (onlyOne) {
- columnCount = 1;
- allTypes = false;
- } else {
- allTypes = r.nextBoolean();
- if (allTypes) {
- // One of each type.
- columnCount = possibleHiveTypeNames.length;
- hashSet = new HashSet<Integer>();
- } else {
- columnCount = 1 + r.nextInt(20);
- }
- }
- typeNames = new ArrayList<String>(columnCount);
- primitiveCategories = new PrimitiveCategory[columnCount];
- primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
- primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
- List<String> columnNames = new ArrayList<String>(columnCount);
- for (int c = 0; c < columnCount; c++) {
- columnNames.add(String.format("col%d", c));
- String typeName;
-
- if (onlyOne) {
- typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
- } else {
- int typeNum;
- if (allTypes) {
- while (true) {
- typeNum = r.nextInt(possibleHiveTypeNames.length);
- Integer typeNumInteger = new Integer(typeNum);
- if (!hashSet.contains(typeNumInteger)) {
- hashSet.add(typeNumInteger);
- break;
- }
- }
- } else {
- typeNum = r.nextInt(possibleHiveTypeNames.length);
- }
- typeName = possibleHiveTypeNames[typeNum];
- }
- if (typeName.equals("char")) {
- int maxLength = 1 + r.nextInt(100);
- typeName = String.format("char(%d)", maxLength);
- } else if (typeName.equals("varchar")) {
- int maxLength = 1 + r.nextInt(100);
- typeName = String.format("varchar(%d)", maxLength);
- } else if (typeName.equals("decimal")) {
- typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
- }
- PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
- primitiveTypeInfos[c] = primitiveTypeInfo;
- PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
- primitiveCategories[c] = primitiveCategory;
- primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
- typeNames.add(typeName);
- }
- rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
- }
-
- public Object[][] randomRows(int n) {
- Object[][] result = new Object[n][];
- for (int i = 0; i < n; i++) {
- result[i] = randomRow();
- }
- return result;
- }
-
- public Object[] randomRow() {
- Object row[] = new Object[columnCount];
- for (int c = 0; c < columnCount; c++) {
- Object object = randomObject(c);
- if (object == null) {
- throw new Error("Unexpected null for column " + c);
- }
- row[c] = getWritableObject(c, object);
- if (row[c] == null) {
- throw new Error("Unexpected null for writable for column " + c);
- }
- }
- return row;
- }
-
- public static void sort(Object[][] rows, ObjectInspector oi) {
- for (int i = 0; i < rows.length; i++) {
- for (int j = i + 1; j < rows.length; j++) {
- if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
- Object[] t = rows[i];
- rows[i] = rows[j];
- rows[j] = t;
- }
- }
- }
- }
-
- public void sort(Object[][] rows) {
- RandomRowObjectSource.sort(rows, rowStructObjectInspector);
- }
-
- public Object getWritableObject(int column, Object object) {
- ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
- PrimitiveCategory primitiveCategory = primitiveCategories[column];
- PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
- case BYTE:
- return ((WritableByteObjectInspector) objectInspector).create((byte) object);
- case SHORT:
- return ((WritableShortObjectInspector) objectInspector).create((short) object);
- case INT:
- return ((WritableIntObjectInspector) objectInspector).create((int) object);
- case LONG:
- return ((WritableLongObjectInspector) objectInspector).create((long) object);
- case DATE:
- return ((WritableDateObjectInspector) objectInspector).create((Date) object);
- case FLOAT:
- return ((WritableFloatObjectInspector) objectInspector).create((float) object);
- case DOUBLE:
- return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
- case STRING:
- return ((WritableStringObjectInspector) objectInspector).create((String) object);
- case CHAR:
- {
- WritableHiveCharObjectInspector writableCharObjectInspector =
- new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
- return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
- }
- case VARCHAR:
- {
- WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
- new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
- return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
- }
- case BINARY:
- return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
- case TIMESTAMP:
- return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
- case INTERVAL_YEAR_MONTH:
- return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
- case INTERVAL_DAY_TIME:
- return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
- case DECIMAL:
- {
- WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
- new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
- return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
- }
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
- }
- }
-
- public Object randomObject(int column) {
- PrimitiveCategory primitiveCategory = primitiveCategories[column];
- PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return Boolean.valueOf(r.nextInt(1) == 1);
- case BYTE:
- return Byte.valueOf((byte) r.nextInt());
- case SHORT:
- return Short.valueOf((short) r.nextInt());
- case INT:
- return Integer.valueOf(r.nextInt());
- case LONG:
- return Long.valueOf(r.nextLong());
- case DATE:
- return RandomTypeUtil.getRandDate(r);
- case FLOAT:
- return Float.valueOf(r.nextFloat() * 10 - 5);
- case DOUBLE:
- return Double.valueOf(r.nextDouble() * 10 - 5);
- case STRING:
- return RandomTypeUtil.getRandString(r);
- case CHAR:
- return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
- case VARCHAR:
- return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
- case BINARY:
- return getRandBinary(r, 1 + r.nextInt(100));
- case TIMESTAMP:
- return RandomTypeUtil.getRandTimestamp(r);
- case INTERVAL_YEAR_MONTH:
- return getRandIntervalYearMonth(r);
- case INTERVAL_DAY_TIME:
- return getRandIntervalDayTime(r);
- case DECIMAL:
- return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
- }
- }
-
- public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
- int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
- String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
- HiveChar hiveChar = new HiveChar(randomString, maxLength);
- return hiveChar;
- }
-
- public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
- int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
- String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
- HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
- return hiveVarchar;
- }
-
- public static byte[] getRandBinary(Random r, int len){
- byte[] bytes = new byte[len];
- for (int j = 0; j < len; j++){
- bytes[j] = Byte.valueOf((byte) r.nextInt());
- }
- return bytes;
- }
-
- private static final String DECIMAL_CHARS = "0123456789";
-
- public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
- while (true) {
- StringBuilder sb = new StringBuilder();
- int precision = 1 + r.nextInt(18);
- int scale = 0 + r.nextInt(precision + 1);
-
- int integerDigits = precision - scale;
-
- if (r.nextBoolean()) {
- sb.append("-");
- }
-
- if (integerDigits == 0) {
- sb.append("0");
- } else {
- sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
- }
- if (scale != 0) {
- sb.append(".");
- sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
- }
-
- HiveDecimal bd = HiveDecimal.create(sb.toString());
- if (bd.scale() > bd.precision()) {
- // Sometimes weird decimals are produced?
- continue;
- }
-
- return bd;
- }
- }
-
- public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
- String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
- String intervalYearMonthStr = String.format("%s%d-%d",
- yearMonthSignStr,
- Integer.valueOf(1800 + r.nextInt(500)), // year
- Integer.valueOf(0 + r.nextInt(12))); // month
- HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
- return intervalYearMonthVal;
- }
-
- public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
- String optionalNanos = "";
- if (r.nextInt(2) == 1) {
- optionalNanos = String.format(".%09d",
- Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
- }
- String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
- String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
- yearMonthSignStr,
- Integer.valueOf(1 + r.nextInt(28)), // day
- Integer.valueOf(0 + r.nextInt(24)), // hour
- Integer.valueOf(0 + r.nextInt(60)), // minute
- Integer.valueOf(0 + r.nextInt(60)), // second
- optionalNanos);
- HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
- return intervalDayTimeVal;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
index e562ce3..fb41420 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
@@ -96,7 +96,7 @@ public interface SerializeWrite {
/*
* STRING.
- *
+ *
* Can be used to write CHAR and VARCHAR when the caller takes responsibility for
* truncation/padding issues.
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
index 765ba7e..ac44390 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.serde2.lazy.fast;
import java.io.UnsupportedEncodingException;
import java.nio.charset.CharacterCodingException;
import java.sql.Date;
+import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -73,6 +74,7 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
private int end;
private int fieldCount;
private int fieldIndex;
+ private int parseFieldIndex;
private int fieldStart;
private int fieldLength;
@@ -124,6 +126,41 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
fieldIndex = -1;
}
+ /*
+ * Get detailed read position information to help diagnose exceptions.
+ */
+ public String getDetailedReadPositionString() {
+ StringBuffer sb = new StringBuffer();
+
+ sb.append("Reading byte[] of length ");
+ sb.append(bytes.length);
+ sb.append(" at start offset ");
+ sb.append(start);
+ sb.append(" for length ");
+ sb.append(end - start);
+ sb.append(" to read ");
+ sb.append(fieldCount);
+ sb.append(" fields with types ");
+ sb.append(Arrays.toString(typeInfos));
+ sb.append(". ");
+ if (fieldIndex == -1) {
+ sb.append("Error during field delimitor parsing of field #");
+ sb.append(parseFieldIndex);
+ } else {
+ sb.append("Read field #");
+ sb.append(fieldIndex);
+ sb.append(" at field start position ");
+ sb.append(startPosition[fieldIndex]);
+ int currentFieldLength = startPosition[fieldIndex + 1] - startPosition[fieldIndex] - 1;
+ sb.append(" for field length ");
+ sb.append(currentFieldLength);
+ sb.append(" current read offset ");
+ sb.append(offset);
+ }
+
+ return sb.toString();
+ }
+
/**
* Parse the byte[] and fill each field.
*
@@ -133,27 +170,29 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
private void parse() {
int structByteEnd = end;
- int fieldId = 0;
int fieldByteBegin = start;
int fieldByteEnd = start;
+ // Kept as a member variable to support getDetailedReadPositionString.
+ parseFieldIndex = 0;
+
// Go through all bytes in the byte[]
while (fieldByteEnd <= structByteEnd) {
if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
// Reached the end of a field?
- if (lastColumnTakesRest && fieldId == fieldCount - 1) {
+ if (lastColumnTakesRest && parseFieldIndex == fieldCount - 1) {
fieldByteEnd = structByteEnd;
}
- startPosition[fieldId] = fieldByteBegin;
- fieldId++;
- if (fieldId == fieldCount || fieldByteEnd == structByteEnd) {
+ startPosition[parseFieldIndex] = fieldByteBegin;
+ parseFieldIndex++;
+ if (parseFieldIndex == fieldCount || fieldByteEnd == structByteEnd) {
// All fields have been parsed, or bytes have been parsed.
// We need to set the startPosition of fields.length to ensure we
// can use the same formula to calculate the length of each field.
// For missing fields, their starting positions will all be the same,
// which will make their lengths to be -1 and uncheckedGetField will
// return these fields as NULLs.
- for (int i = fieldId; i <= fieldCount; i++) {
+ for (int i = parseFieldIndex; i <= fieldCount; i++) {
startPosition[i] = fieldByteEnd + 1;
}
break;
@@ -177,8 +216,8 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
}
// Missing fields?
- if (!missingFieldWarned && fieldId < fieldCount) {
- doMissingFieldWarned(fieldId);
+ if (!missingFieldWarned && parseFieldIndex < fieldCount) {
+ doMissingFieldWarned(parseFieldIndex);
}
}
@@ -518,12 +557,8 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
return missingFieldWarned;
}
@Override
- public boolean readBeyondBufferRangeWarned() {
- return extraFieldWarned;
- }
- @Override
public boolean bufferRangeHasExtraDataWarned() {
- return false; // UNDONE: Get rid of...
+ return false;
}
private void doExtraFieldWarned() {
http://git-wip-us.apache.org/repos/asf/hive/blob/2d1f403d/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
index bbb35c7..0df1d79 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.serde2.lazybinary.fast;
import java.io.EOFException;
import java.io.IOException;
+import java.util.Arrays;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -31,6 +33,7 @@ import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
/*
* Directly deserialize with the caller reading field-by-field the LazyBinary serialization format.
@@ -54,6 +57,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
private int offset;
private int end;
private int fieldCount;
+ private int fieldStart;
private int fieldIndex;
private byte nullByte;
@@ -62,7 +66,6 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
private VLong tempVLong;
private boolean readBeyondConfiguredFieldsWarned;
- private boolean readBeyondBufferRangeWarned;
private boolean bufferRangeHasExtraDataWarned;
public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) {
@@ -71,7 +74,6 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
tempVInt = new VInt();
tempVLong = new VLong();
readBeyondConfiguredFieldsWarned = false;
- readBeyondBufferRangeWarned = false;
bufferRangeHasExtraDataWarned = false;
}
@@ -93,6 +95,32 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
}
/*
+ * Get detailed read position information to help diagnose exceptions.
+ */
+ public String getDetailedReadPositionString() {
+ StringBuffer sb = new StringBuffer();
+
+ sb.append("Reading byte[] of length ");
+ sb.append(bytes.length);
+ sb.append(" at start offset ");
+ sb.append(start);
+ sb.append(" for length ");
+ sb.append(end - start);
+ sb.append(" to read ");
+ sb.append(fieldCount);
+ sb.append(" fields with types ");
+ sb.append(Arrays.toString(typeInfos));
+ sb.append(". Read field #");
+ sb.append(fieldIndex);
+ sb.append(" at field start position ");
+ sb.append(fieldStart);
+ sb.append(" current read offset ");
+ sb.append(offset);
+
+ return sb.toString();
+ }
+
+ /*
* Reads the NULL information for a field.
*
* @return Returns true when the field is NULL; reading is positioned to the next field.
@@ -111,11 +139,13 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
return true;
}
+ fieldStart = offset;
+
if (fieldIndex == 0) {
// The rest of the range check for fields after the first is below after checking
// the NULL byte.
if (offset >= end) {
- warnBeyondEof();
+ throw new EOFException();
}
nullByte = bytes[offset++];
}
@@ -129,9 +159,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
// Make sure there is at least one byte that can be read for a value.
if (offset >= end) {
- // Careful: since we may be dealing with NULLs in the final NULL byte, we check after
- // the NULL byte check..
- warnBeyondEof();
+ throw new EOFException();
}
/*
@@ -149,33 +177,33 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
case SHORT:
// Last item -- ok to be at end.
if (offset + 2 > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentShort = LazyBinaryUtils.byteArrayToShort(bytes, offset);
offset += 2;
break;
case INT:
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
currentInt = tempVInt.value;
break;
case LONG:
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
offset += tempVLong.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
currentLong = tempVLong.value;
break;
case FLOAT:
// Last item -- ok to be at end.
if (offset + 4 > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentFloat = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset));
offset += 4;
@@ -183,7 +211,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
case DOUBLE:
// Last item -- ok to be at end.
if (offset + 8 > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentDouble = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset));
offset += 8;
@@ -195,18 +223,19 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
case VARCHAR:
{
// using vint instead of 4 bytes
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Could be last item for empty string -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
+
int saveStart = offset;
int length = tempVInt.value;
offset += length;
// Last item -- ok to be at end.
if (offset > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentBytes = bytes;
@@ -215,12 +244,12 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
}
break;
case DATE:
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
currentDateWritable.set(tempVInt.value);
break;
@@ -231,34 +260,37 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
offset += length;
// Last item -- ok to be at end.
if (offset > end) {
- warnBeyondEof();
+ throw new EOFException();
}
currentTimestampWritable.set(bytes, saveStart);
}
break;
case INTERVAL_YEAR_MONTH:
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
+
currentHiveIntervalYearMonthWritable.set(tempVInt.value);
break;
case INTERVAL_DAY_TIME:
+ // The first bounds check requires at least one more byte beyond for 2nd int (hence >=).
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
offset += tempVLong.length;
- if (offset >= end) {
- // Overshoot or not enough for next item.
- warnBeyondEof();
+
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- // Last item -- ok to be at end.
- if (offset > end) {
- warnBeyondEof();
- }
currentHiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value);
break;
@@ -269,23 +301,27 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
// These calls are to see how much data there is. The setFromBytes call below will do the same
// readVInt reads but actually unpack the decimal.
+
+ // The first bounds check requires at least one more byte beyond for 2nd int (hence >=).
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
+ throw new EOFException();
+ }
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
int saveStart = offset;
offset += tempVInt.length;
- if (offset >= end) {
- // Overshoot or not enough for next item.
- warnBeyondEof();
+
+ // Parse the first byte of a vint/vlong to determine the number of bytes.
+ if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+ throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
- if (offset >= end) {
- // Overshoot or not enough for next item.
- warnBeyondEof();
- }
+
offset += tempVInt.value;
// Last item -- ok to be at end.
if (offset > end) {
- warnBeyondEof();
+ throw new EOFException();
}
int length = offset - saveStart;
@@ -327,7 +363,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
if ((fieldIndex % 8) == 0) {
// Get next null byte.
if (offset >= end) {
- warnBeyondEof();
+ throw new EOFException();
}
nullByte = bytes[offset++];
}
@@ -363,23 +399,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
return readBeyondConfiguredFieldsWarned;
}
@Override
- public boolean readBeyondBufferRangeWarned() {
- return readBeyondBufferRangeWarned;
- }
- @Override
public boolean bufferRangeHasExtraDataWarned() {
return bufferRangeHasExtraDataWarned;
}
-
- private void warnBeyondEof() throws EOFException {
- if (!readBeyondBufferRangeWarned) {
- // Warn only once.
- int length = end - start;
- LOG.info("Reading beyond buffer range! Buffer range " + start
- + " for length " + length + " but reading more... "
- + "(total buffer length " + bytes.length + ")"
- + " Ignoring similar problems.");
- readBeyondBufferRangeWarned = true;
- }
- }
}