You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/01/23 20:59:24 UTC
svn commit: r1654355 [12/27] - in /hive/branches/llap: ./
beeline/src/java/org/apache/hive/beeline/
cli/src/java/org/apache/hadoop/hive/cli/
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/ data/conf/ data/con...
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java Fri Jan 23 19:59:11 2015
@@ -57,22 +57,11 @@ public abstract class MapJoinKey {
public abstract boolean hasAnyNulls(int fieldCount, boolean[] nullsafes);
@SuppressWarnings("deprecation")
- public static MapJoinKey read(Output output, MapJoinKey key,
- MapJoinObjectSerDeContext context, Writable writable, boolean mayReuseKey)
- throws SerDeException, HiveException {
+ public static MapJoinKey read(Output output, MapJoinObjectSerDeContext context,
+ Writable writable) throws SerDeException, HiveException {
SerDe serde = context.getSerDe();
Object obj = serde.deserialize(writable);
- boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
- if (useOptimized || key == null) {
- byte[] structBytes = serialize(output, obj, serde.getObjectInspector(), !useOptimized);
- if (structBytes != null) {
- return MapJoinKeyBytes.fromBytes(key, mayReuseKey, structBytes);
- } else if (useOptimized) {
- throw new SerDeException(
- "Failed to serialize " + obj + " even though optimized keys are used");
- }
- }
- MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
+ MapJoinKeyObject result = new MapJoinKeyObject();
result.read(serde.getObjectInspector(), obj);
return result;
}
@@ -98,35 +87,6 @@ public abstract class MapJoinKey {
SUPPORTED_PRIMITIVES.add(PrimitiveCategory.CHAR);
}
- private static byte[] serialize(Output byteStream,
- Object obj, ObjectInspector oi, boolean checkTypes) throws HiveException {
- if (null == obj || !(oi instanceof StructObjectInspector)) {
- return null; // not supported
- }
- StructObjectInspector soi = (StructObjectInspector)oi;
- List<? extends StructField> fields = soi.getAllStructFieldRefs();
- int size = fields.size();
- if (size > 8) {
- return null; // not supported
- } else if (size == 0) {
- return EMPTY_BYTE_ARRAY; // shortcut for null keys
- }
- Object[] fieldData = new Object[size];
- List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>(size);
- for (int i = 0; i < size; ++i) {
- StructField field = fields.get(i);
- ObjectInspector foi = field.getFieldObjectInspector();
- if (checkTypes && !isSupportedField(foi)) {
- return null;
- }
- fieldData[i] = soi.getStructFieldData(obj, field);
- fieldOis.add(foi);
- }
-
- byteStream = serializeRow(byteStream, fieldData, fieldOis, null);
- return Arrays.copyOf(byteStream.getData(), byteStream.getLength());
- }
-
public static boolean isSupportedField(ObjectInspector foi) {
if (foi.getCategory() != Category.PRIMITIVE) return false; // not supported
PrimitiveCategory pc = ((PrimitiveObjectInspector)foi).getPrimitiveCategory();
@@ -136,19 +96,6 @@ public abstract class MapJoinKey {
public static MapJoinKey readFromVector(Output output, MapJoinKey key, Object[] keyObject,
List<ObjectInspector> keyOIs, boolean mayReuseKey) throws HiveException {
- boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
- if (useOptimized || key == null) {
- if (keyObject.length <= 8) {
- output = serializeRow(output, keyObject, keyOIs, null);
- return MapJoinKeyBytes.fromBytes(key, mayReuseKey,
- Arrays.copyOf(output.getData(), output.getLength()));
- }
- if (useOptimized) {
- throw new HiveException(
- "Failed to serialize " + Arrays.toString(keyObject) +
- " even though optimized keys are used");
- }
- }
MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
result.setKeyObjects(keyObject);
return result;
@@ -178,32 +125,11 @@ public abstract class MapJoinKey {
public static MapJoinKey readFromRow(Output output, MapJoinKey key, Object[] keyObject,
List<ObjectInspector> keyFieldsOI, boolean mayReuseKey) throws HiveException {
- boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
- if (useOptimized || key == null) {
- if (keyObject.length <= 8) {
- byte[] structBytes;
- if (keyObject.length == 0) {
- structBytes = EMPTY_BYTE_ARRAY; // shortcut for null keys
- } else {
- output = serializeRow(output, keyObject, keyFieldsOI, null);
- structBytes = Arrays.copyOf(output.getData(), output.getLength());
- }
- return MapJoinKeyBytes.fromBytes(key, mayReuseKey, structBytes);
- }
- if (useOptimized) {
- throw new HiveException(
- "Failed to serialize " + Arrays.toString(keyObject) +
- " even though optimized keys are used");
- }
- }
MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
result.readFromRow(keyObject, keyFieldsOI);
return result;
}
- private static final Log LOG = LogFactory.getLog(MapJoinKey.class);
-
-
/**
* Serializes row to output.
* @param byteStream Output to reuse. Can be null, in that case a new one would be created.
@@ -228,8 +154,4 @@ public abstract class MapJoinKey {
}
return byteStream;
}
-
- private static boolean useOptimizedKeyBasedOnPrev(MapJoinKey key) {
- return (key != null) && (key instanceof MapJoinKeyBytes);
- }
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java Fri Jan 23 19:59:11 2015
@@ -19,15 +19,20 @@
package org.apache.hadoop.hive.ql.exec.persistence;
import java.io.IOException;
+import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.lang.reflect.Constructor;
import java.util.ConcurrentModificationException;
import java.util.Map;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.Writable;
@SuppressWarnings("deprecation")
@@ -86,6 +91,74 @@ public class MapJoinTableContainerSerDe
throw new HiveException("Error while trying to create table container", e);
}
}
+
+ /**
+ * Loads the table container from a folder. Only used on Spark path.
+ * @param fs FileSystem of the folder.
+ * @param folder The folder to load table container.
+ * @return Loaded table.
+ */
+ @SuppressWarnings("unchecked")
+ public MapJoinPersistableTableContainer load(
+ FileSystem fs, Path folder) throws HiveException {
+ try {
+ if (!fs.isDirectory(folder)) {
+ throw new HiveException("Error, not a directory: " + folder);
+ }
+ FileStatus[] fileStatuses = fs.listStatus(folder);
+ if (fileStatuses == null || fileStatuses.length == 0) {
+ return null;
+ }
+
+ SerDe keySerDe = keyContext.getSerDe();
+ SerDe valueSerDe = valueContext.getSerDe();
+ Writable keyContainer = keySerDe.getSerializedClass().newInstance();
+ Writable valueContainer = valueSerDe.getSerializedClass().newInstance();
+
+ MapJoinPersistableTableContainer tableContainer = null;
+
+ for (FileStatus fileStatus: fileStatuses) {
+ Path filePath = fileStatus.getPath();
+ if (ShimLoader.getHadoopShims().isDirectory(fileStatus)) {
+ throw new HiveException("Error, not a file: " + filePath);
+ }
+ InputStream is = null;
+ ObjectInputStream in = null;
+ try {
+ is = fs.open(filePath, 4096);
+ in = new ObjectInputStream(is);
+ String name = in.readUTF();
+ Map<String, String> metaData = (Map<String, String>) in.readObject();
+ if (tableContainer == null) {
+ tableContainer = create(name, metaData);
+ }
+ int numKeys = in.readInt();
+ for (int keyIndex = 0; keyIndex < numKeys; keyIndex++) {
+ MapJoinKeyObject key = new MapJoinKeyObject();
+ key.read(keyContext, in, keyContainer);
+ if (tableContainer.get(key) == null) {
+ tableContainer.put(key, new MapJoinEagerRowContainer());
+ }
+ MapJoinEagerRowContainer values = (MapJoinEagerRowContainer) tableContainer.get(key);
+ values.read(valueContext, in, valueContainer);
+ tableContainer.put(key, values);
+ }
+ } finally {
+ if (in != null) {
+ in.close();
+ } else if (is != null) {
+ is.close();
+ }
+ }
+ }
+ return tableContainer;
+ } catch (IOException e) {
+ throw new HiveException("IO error while trying to create table container", e);
+ } catch (Exception e) {
+ throw new HiveException("Error while trying to create table container", e);
+ }
+ }
+
public void persist(ObjectOutputStream out, MapJoinPersistableTableContainer tableContainer)
throws HiveException {
int numKeys = tableContainer.size();
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java Fri Jan 23 19:59:11 2015
@@ -234,7 +234,7 @@ public class CustomPartitionVertex exten
try {
fileSplit = getFileSplitFromEvent(diEvent);
} catch (IOException e) {
- throw new RuntimeException("Failed to get file split for event: " + diEvent);
+ throw new RuntimeException("Failed to get file split for event: " + diEvent, e);
}
Set<FileSplit> fsList =
pathFileSplitsMap.get(Utilities.getBucketFileNameFromPathSubString(fileSplit.getPath()
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java Fri Jan 23 19:59:11 2015
@@ -578,7 +578,7 @@ public class DagUtils {
}
} catch (IOException e) {
throw new RuntimeException(
- "Can't make path " + outputPath + " : " + e.getMessage());
+ "Can't make path " + outputPath + " : " + e.getMessage(), e);
}
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java Fri Jan 23 19:59:11 2015
@@ -57,8 +57,6 @@ public class HashTableLoader implements
private ExecMapperContext context;
private Configuration hconf;
private MapJoinDesc desc;
- private MapJoinKey lastKey = null;
- private int rowCount = 0;
@Override
public void init(ExecMapperContext context, Configuration hconf, MapJoinOperator joinOp) {
@@ -111,8 +109,7 @@ public class HashTableLoader implements
: new HashMapWrapper(hconf, keyCount);
while (kvReader.next()) {
- rowCount++;
- lastKey = tableContainer.putRow(keyCtx, (Writable)kvReader.getCurrentKey(),
+ tableContainer.putRow(keyCtx, (Writable)kvReader.getCurrentKey(),
valCtx, (Writable)kvReader.getCurrentValue());
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java Fri Jan 23 19:59:11 2015
@@ -30,7 +30,9 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -414,7 +416,13 @@ public class VectorColumnAssignFactory {
if (val == null) {
assignNull(destIndex);
} else {
- HiveVarchar hiveVarchar = (HiveVarchar) val;
+ // We store VARCHAR type stripped of pads.
+ HiveVarchar hiveVarchar;
+ if (val instanceof HiveVarchar) {
+ hiveVarchar = (HiveVarchar) val;
+ } else {
+ hiveVarchar = ((HiveVarcharWritable) val).getHiveVarchar();
+ }
byte[] bytes = hiveVarchar.getValue().getBytes();
assignBytes(bytes, 0, bytes.length, destIndex);
}
@@ -429,7 +437,12 @@ public class VectorColumnAssignFactory {
assignNull(destIndex);
} else {
// We store CHAR type stripped of pads.
- HiveChar hiveChar = (HiveChar) val;
+ HiveChar hiveChar;
+ if (val instanceof HiveChar) {
+ hiveChar = (HiveChar) val;
+ } else {
+ hiveChar = ((HiveCharWritable) val).getHiveChar();
+ }
byte[] bytes = hiveChar.getStrippedValue().getBytes();
assignBytes(bytes, 0, bytes.length, destIndex);
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java Fri Jan 23 19:59:11 2015
@@ -70,6 +70,7 @@ public class VectorGroupByOperator exten
* Key vector expressions.
*/
private VectorExpression[] keyExpressions;
+ private int outputKeyLength;
private boolean isVectorOutput;
@@ -768,9 +769,16 @@ public class VectorGroupByOperator exten
List<ExprNodeDesc> keysDesc = conf.getKeys();
try {
- keyOutputWriters = new VectorExpressionWriter[keyExpressions.length];
+ List<String> outputFieldNames = conf.getOutputColumnNames();
+
+ // grouping id should be pruned, which is the last of key columns
+ // see ColumnPrunerGroupByProc
+ outputKeyLength =
+ conf.pruneGroupingSetId() ? keyExpressions.length - 1 : keyExpressions.length;
+
+ keyOutputWriters = new VectorExpressionWriter[outputKeyLength];
- for(int i = 0; i < keyExpressions.length; ++i) {
+ for(int i = 0; i < outputKeyLength; ++i) {
keyOutputWriters[i] = VectorExpressionWriterFactory.
genVectorExpressionWritable(keysDesc.get(i));
objectInspectors.add(keyOutputWriters[i].getObjectInspector());
@@ -788,7 +796,6 @@ public class VectorGroupByOperator exten
aggregationBatchInfo.compileAggregationBatchInfo(aggregators);
}
LOG.warn("VectorGroupByOperator is vector output " + isVectorOutput);
- List<String> outputFieldNames = conf.getOutputColumnNames();
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
outputFieldNames, objectInspectors);
if (isVectorOutput) {
@@ -807,9 +814,9 @@ public class VectorGroupByOperator exten
initializeChildren(hconf);
- forwardCache = new Object[keyExpressions.length + aggregators.length];
+ forwardCache = new Object[outputKeyLength + aggregators.length];
- if (keyExpressions.length == 0) {
+ if (outputKeyLength == 0) {
processingMode = this.new ProcessingModeGlobalAggregate();
} else if (conf.getVectorDesc().isVectorGroupBatches()) {
// Sorted GroupBy of vector batches where an individual batch has the same group key (e.g. reduce).
@@ -872,7 +879,7 @@ public class VectorGroupByOperator exten
int fi = 0;
if (!isVectorOutput) {
// Output row.
- for (int i = 0; i < keyExpressions.length; ++i) {
+ for (int i = 0; i < outputKeyLength; ++i) {
forwardCache[fi++] = keyWrappersBatch.getWritableKeyValue (
kw, i, keyOutputWriters[i]);
}
@@ -886,7 +893,7 @@ public class VectorGroupByOperator exten
forward(forwardCache, outputObjInspector);
} else {
// Output keys and aggregates into the output batch.
- for (int i = 0; i < keyExpressions.length; ++i) {
+ for (int i = 0; i < outputKeyLength; ++i) {
vectorColumnAssign[fi++].assignObjectValue(keyWrappersBatch.getWritableKeyValue (
kw, i, keyOutputWriters[i]), outputBatch.size);
}
@@ -910,7 +917,7 @@ public class VectorGroupByOperator exten
*/
private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buffer)
throws HiveException {
- int fi = keyExpressions.length; // Start after group keys.
+ int fi = outputKeyLength; // Start after group keys.
for (int i = 0; i < aggregators.length; ++i) {
vectorColumnAssign[fi++].assignObjectValue(aggregators[i].evaluateOutput(
agg.getAggregationBuffer(i)), outputBatch.size);
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java Fri Jan 23 19:59:11 2015
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec.v
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalToStringUnaryUDF;
/**
* To support vectorized cast of decimal to string.
@@ -49,9 +48,8 @@ public class CastDecimalToString extends
try {
b = s.getBytes("UTF-8");
} catch (Exception e) {
-
// This should never happen. If it does, there is a bug.
- throw new RuntimeException("Internal error: unable to convert decimal to string");
+ throw new RuntimeException("Internal error: unable to convert decimal to string", e);
}
assign(outV, i, b, b.length);
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java Fri Jan 23 19:59:11 2015
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.exec.vector.udf;
-import java.io.IOException;
import java.io.Serializable;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -107,7 +106,7 @@ public class VectorUDFArgDesc implements
o = writers[argPosition].writeValue(cv, row);
return new GenericUDF.DeferredJavaObject(o);
} catch (HiveException e) {
- throw new RuntimeException("Unable to get Java object from VectorizedRowBatch");
+ throw new RuntimeException("Unable to get Java object from VectorizedRowBatch", e);
}
}
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java Fri Jan 23 19:59:11 2015
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FSDataInputS
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
@@ -98,7 +99,7 @@ public class HiveIndexResult {
FileSystem fs = indexFilePath.getFileSystem(conf);
FileStatus indexStat = fs.getFileStatus(indexFilePath);
if (indexStat.isDir()) {
- FileStatus[] fss = fs.listStatus(indexFilePath);
+ FileStatus[] fss = fs.listStatus(indexFilePath, FileUtils.HIDDEN_FILES_PATH_FILTER);
for (FileStatus f : fss) {
paths.add(f.getPath());
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java Fri Jan 23 19:59:11 2015
@@ -185,7 +185,7 @@ public class HiveIndexedInputFormat exte
}
} catch (HiveException e) {
throw new RuntimeException(
- "Unable to get metadata for input table split" + split.getPath());
+ "Unable to get metadata for input table split" + split.getPath(), e);
}
}
InputSplit retA[] = newSplits.toArray((new FileSplit[newSplits.size()]));
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Fri Jan 23 19:59:11 2015
@@ -27,7 +27,6 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -92,14 +91,7 @@ public class BucketizedHiveInputFormat<K
List<IOException> errors = new ArrayList<IOException>();
FileSystem fs = dir.getFileSystem(job);
- FileStatus[] matches = fs.globStatus(dir, new PathFilter() {
-
- @Override
- public boolean accept(Path p) {
- String name = p.getName();
- return !name.startsWith("_") && !name.startsWith(".");
- }
- });
+ FileStatus[] matches = fs.globStatus(dir, FileUtils.HIDDEN_FILES_PATH_FILTER);
if (matches == null) {
errors.add(new IOException("Input path does not exist: " + dir));
} else if (matches.length == 0) {
@@ -113,7 +105,8 @@ public class BucketizedHiveInputFormat<K
if (!errors.isEmpty()) {
throw new InvalidInputException(errors);
}
- LOG.info("Total input paths to process : " + result.size());
+ LOG.debug("Matches for " + dir + ": " + result);
+ LOG.info("Total input paths to process : " + result.size() + " from dir " + dir);
return result.toArray(new FileStatus[result.size()]);
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Fri Jan 23 19:59:11 2015
@@ -30,6 +30,10 @@ import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -38,6 +42,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -74,6 +79,48 @@ public class CombineHiveInputFormat<K ex
private static final String CLASS_NAME = CombineHiveInputFormat.class.getName();
public static final Log LOG = LogFactory.getLog(CLASS_NAME);
+ // max number of threads we can use to check non-combinable paths
+ private static final int MAX_CHECK_NONCOMBINABLE_THREAD_NUM = 50;
+ private static final int DEFAULT_NUM_PATH_PER_THREAD = 100;
+
+ private class CheckNonCombinablePathCallable implements Callable<Set<Integer>> {
+ private final Path[] paths;
+ private final int start;
+ private final int length;
+ private final JobConf conf;
+
+ public CheckNonCombinablePathCallable(Path[] paths, int start, int length, JobConf conf) {
+ this.paths = paths;
+ this.start = start;
+ this.length = length;
+ this.conf = conf;
+ }
+
+ @Override
+ public Set<Integer> call() throws Exception {
+ Set<Integer> nonCombinablePathIndices = new HashSet<Integer>();
+ for (int i = 0; i < length; i++) {
+ PartitionDesc part =
+ HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+ pathToPartitionInfo, paths[i + start],
+ IOPrepareCache.get().allocatePartitionDescMap());
+ // Use HiveInputFormat if any of the paths is not splittable
+ Class<? extends InputFormat> inputFormatClass = part.getInputFileFormatClass();
+ InputFormat<WritableComparable, Writable> inputFormat =
+ getInputFormatFromCache(inputFormatClass, conf);
+ if (inputFormat instanceof AvoidSplitCombination &&
+ ((AvoidSplitCombination) inputFormat).shouldSkipCombine(paths[i + start], conf)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("The path [" + paths[i + start] +
+ "] is being parked for HiveInputFormat.getSplits");
+ }
+ nonCombinablePathIndices.add(i);
+ }
+ }
+ return nonCombinablePathIndices;
+ }
+ }
+
/**
* CombineHiveInputSplit encapsulates an InputSplit with its corresponding
* inputFormatClassName. A CombineHiveInputSplit comprises of multiple chunks
@@ -278,8 +325,6 @@ public class CombineHiveInputFormat<K ex
private InputSplit[] getCombineSplits(JobConf job, int numSplits,
Map<String, PartitionDesc> pathToPartitionInfo)
throws IOException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
- perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
init(job);
Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
Map<String, Operator<? extends OperatorDesc>> aliasToWork =
@@ -290,7 +335,6 @@ public class CombineHiveInputFormat<K ex
InputSplit[] splits = null;
if (combine == null) {
splits = super.getSplits(job, numSplits);
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
return splits;
}
@@ -349,13 +393,12 @@ public class CombineHiveInputFormat<K ex
} else if ((new CompressionCodecFactory(job)).getCodec(path) != null) {
//if compresssion codec is set, use HiveInputFormat.getSplits (don't combine)
splits = super.getSplits(job, numSplits);
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
return splits;
}
while (dirs.peek() != null) {
Path tstPath = dirs.remove();
- FileStatus[] fStatus = inpFs.listStatus(tstPath);
+ FileStatus[] fStatus = inpFs.listStatus(tstPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
for (int idx = 0; idx < fStatus.length; idx++) {
if (fStatus[idx].isDir()) {
dirs.offer(fStatus[idx].getPath());
@@ -363,7 +406,6 @@ public class CombineHiveInputFormat<K ex
fStatus[idx].getPath()) != null) {
//if compresssion codec is set, use HiveInputFormat.getSplits (don't combine)
splits = super.getSplits(job, numSplits);
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
return splits;
}
}
@@ -373,7 +415,6 @@ public class CombineHiveInputFormat<K ex
//don't combine if inputformat is a SymlinkTextInputFormat
if (inputFormat instanceof SymlinkTextInputFormat) {
splits = super.getSplits(job, numSplits);
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
return splits;
}
@@ -451,7 +492,6 @@ public class CombineHiveInputFormat<K ex
}
LOG.info("number of splits " + result.size());
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
return result.toArray(new CombineHiveInputSplit[result.size()]);
}
@@ -460,6 +500,8 @@ public class CombineHiveInputFormat<K ex
*/
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
init(job);
ArrayList<InputSplit> result = new ArrayList<InputSplit>();
@@ -469,26 +511,37 @@ public class CombineHiveInputFormat<K ex
List<Path> nonCombinablePaths = new ArrayList<Path>(paths.length / 2);
List<Path> combinablePaths = new ArrayList<Path>(paths.length / 2);
- for (Path path : paths) {
-
- PartitionDesc part =
- HiveFileFormatUtils.getPartitionDescFromPathRecursively(
- pathToPartitionInfo, path,
- IOPrepareCache.get().allocatePartitionDescMap());
-
- // Use HiveInputFormat if any of the paths is not splittable
- Class<? extends InputFormat> inputFormatClass = part.getInputFileFormatClass();
- InputFormat<WritableComparable, Writable> inputFormat = getInputFormatFromCache(inputFormatClass, job);
- if (inputFormat instanceof AvoidSplitCombination &&
- ((AvoidSplitCombination) inputFormat).shouldSkipCombine(path, job)) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("The split [" + path +
- "] is being parked for HiveInputFormat.getSplits");
+ int numThreads = Math.min(MAX_CHECK_NONCOMBINABLE_THREAD_NUM,
+ (int) Math.ceil((double) paths.length / DEFAULT_NUM_PATH_PER_THREAD));
+ int numPathPerThread = (int) Math.ceil((double) paths.length / numThreads);
+ LOG.info("Total number of paths: " + paths.length +
+ ", launching " + numThreads + " threads to check non-combinable ones.");
+ ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+ List<Future<Set<Integer>>> futureList = new ArrayList<Future<Set<Integer>>>(numThreads);
+ try {
+ for (int i = 0; i < numThreads; i++) {
+ int start = i * numPathPerThread;
+ int length = i != numThreads - 1 ? numPathPerThread : paths.length - start;
+ futureList.add(executor.submit(
+ new CheckNonCombinablePathCallable(paths, start, length, job)));
+ }
+ Set<Integer> nonCombinablePathIndices = new HashSet<Integer>();
+ for (Future<Set<Integer>> future : futureList) {
+ nonCombinablePathIndices.addAll(future.get());
+ }
+ for (int i = 0; i < paths.length; i++) {
+ if (nonCombinablePathIndices.contains(i)) {
+ nonCombinablePaths.add(paths[i]);
+ } else {
+ combinablePaths.add(paths[i]);
}
- nonCombinablePaths.add(path);
- } else {
- combinablePaths.add(path);
}
+ } catch (Exception e) {
+ LOG.error("Error checking non-combinable path", e);
+ perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
+ throw new IOException(e);
+ } finally {
+ executor.shutdownNow();
}
// Store the previous value for the path specification
@@ -528,6 +581,7 @@ public class CombineHiveInputFormat<K ex
job.set(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname, oldPaths);
}
LOG.info("Number of all splits " + result.size());
+ perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
return result.toArray(new InputSplit[result.size()]);
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java Fri Jan 23 19:59:11 2015
@@ -66,6 +66,7 @@ public abstract class HiveContextAwareRe
private boolean wasUsingSortedSearch = false;
private String genericUDFClassName = null;
private final List<Comparison> stopComparisons = new ArrayList<Comparison>();
+ private Map<String, PartitionDesc> pathToPartitionInfo;
protected RecordReader recordReader;
protected JobConf jobConf;
@@ -116,11 +117,11 @@ public abstract class HiveContextAwareRe
if(retVal) {
if(key instanceof RecordIdentifier) {
//supports AcidInputFormat which uses the KEY pass ROW__ID info
- ioCxtRef.ri = (RecordIdentifier)key;
+ ioCxtRef.setRecordIdentifier((RecordIdentifier)key);
}
else if(recordReader instanceof AcidInputFormat.AcidRecordReader) {
//supports AcidInputFormat which do not use the KEY pass ROW__ID info
- ioCxtRef.ri = ((AcidInputFormat.AcidRecordReader) recordReader).getRecordIdentifier();
+ ioCxtRef.setRecordIdentifier(((AcidInputFormat.AcidRecordReader) recordReader).getRecordIdentifier());
}
}
return retVal;
@@ -133,43 +134,43 @@ public abstract class HiveContextAwareRe
protected void updateIOContext()
throws IOException {
long pointerPos = this.getPos();
- if (!ioCxtRef.isBlockPointer) {
- ioCxtRef.currentBlockStart = pointerPos;
- ioCxtRef.currentRow = 0;
+ if (!ioCxtRef.isBlockPointer()) {
+ ioCxtRef.setCurrentBlockStart(pointerPos);
+ ioCxtRef.setCurrentRow(0);
return;
}
- ioCxtRef.currentRow++;
+ ioCxtRef.setCurrentRow(ioCxtRef.getCurrentRow() + 1);
- if (ioCxtRef.nextBlockStart == -1) {
- ioCxtRef.nextBlockStart = pointerPos;
- ioCxtRef.currentRow = 0;
+ if (ioCxtRef.getNextBlockStart() == -1) {
+ ioCxtRef.setNextBlockStart(pointerPos);
+ ioCxtRef.setCurrentRow(0);
}
- if (pointerPos != ioCxtRef.nextBlockStart) {
+ if (pointerPos != ioCxtRef.getNextBlockStart()) {
// the reader pointer has moved to the end of next block, or the end of
// current record.
- ioCxtRef.currentRow = 0;
+ ioCxtRef.setCurrentRow(0);
- if (ioCxtRef.currentBlockStart == ioCxtRef.nextBlockStart) {
- ioCxtRef.currentRow = 1;
+ if (ioCxtRef.getCurrentBlockStart() == ioCxtRef.getNextBlockStart()) {
+ ioCxtRef.setCurrentRow(1);
}
- ioCxtRef.currentBlockStart = ioCxtRef.nextBlockStart;
- ioCxtRef.nextBlockStart = pointerPos;
+ ioCxtRef.setCurrentBlockStart(ioCxtRef.getNextBlockStart());
+ ioCxtRef.setNextBlockStart(pointerPos);
}
}
public IOContext getIOContext() {
- return IOContext.get(jobConf.get(Utilities.INPUT_NAME));
+ return IOContext.get(jobConf);
}
private void initIOContext(long startPos, boolean isBlockPointer,
Path inputPath) {
ioCxtRef = this.getIOContext();
- ioCxtRef.currentBlockStart = startPos;
- ioCxtRef.isBlockPointer = isBlockPointer;
- ioCxtRef.inputPath = inputPath;
+ ioCxtRef.setCurrentBlockStart(startPos);
+ ioCxtRef.setBlockPointer(isBlockPointer);
+ ioCxtRef.setInputPath(inputPath);
LOG.info("Processing file " + inputPath);
initDone = true;
}
@@ -222,7 +223,7 @@ public abstract class HiveContextAwareRe
// Binary search only works if we know the size of the split, and the recordReader is an
// RCFileRecordReader
this.getIOContext().setUseSorted(true);
- this.getIOContext().setIsBinarySearching(true);
+ this.getIOContext().setBinarySearching(true);
this.wasUsingSortedSearch = true;
} else {
// Use the defalut methods for next in the child class
@@ -284,7 +285,7 @@ public abstract class HiveContextAwareRe
// binary search, if the new position at least as big as the size of the split, any
// matching rows must be in the final block, so we can end the binary search.
if (newPosition == previousPosition || newPosition >= splitEnd) {
- this.getIOContext().setIsBinarySearching(false);
+ this.getIOContext().setBinarySearching(false);
sync(rangeStart);
}
@@ -310,8 +311,10 @@ public abstract class HiveContextAwareRe
Path filePath = this.ioCxtRef.getInputPath();
PartitionDesc part = null;
try {
- Map<String, PartitionDesc> pathToPartitionInfo = Utilities
+ if (pathToPartitionInfo == null) {
+ pathToPartitionInfo = Utilities
.getMapWork(jobConf).getPathToPartitionInfo();
+ }
part = HiveFileFormatUtils
.getPartitionDescFromPathRecursively(pathToPartitionInfo,
filePath, IOPrepareCache.get().getPartitionDescMap());
@@ -402,7 +405,7 @@ public abstract class HiveContextAwareRe
*/
private void beginLinearSearch() throws IOException {
sync(rangeStart);
- this.getIOContext().setIsBinarySearching(false);
+ this.getIOContext().setBinarySearching(false);
this.wasUsingSortedSearch = false;
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java Fri Jan 23 19:59:11 2015
@@ -21,8 +21,10 @@ package org.apache.hadoop.hive.ql.io;
import java.util.HashMap;
import java.util.Map;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Utilities;
/**
* IOContext basically contains the position information of the current
@@ -34,18 +36,30 @@ import org.apache.hadoop.fs.Path;
*/
public class IOContext {
- private static ThreadLocal<IOContext> threadLocal = new ThreadLocal<IOContext>(){
+ /**
+ * Spark uses this thread local
+ */
+ private static final ThreadLocal<IOContext> threadLocal = new ThreadLocal<IOContext>(){
@Override
protected synchronized IOContext initialValue() { return new IOContext(); }
};
- private static Map<String, IOContext> inputNameIOContextMap = new HashMap<String, IOContext>();
- public static Map<String, IOContext> getMap() {
- return inputNameIOContextMap;
+ private static IOContext get() {
+ return IOContext.threadLocal.get();
}
- public static IOContext get(String inputName) {
- if (inputNameIOContextMap.containsKey(inputName) == false) {
+ /**
+ * Tez and MR use this map but are single threaded per JVM thus no synchronization is required.
+ */
+ private static final Map<String, IOContext> inputNameIOContextMap = new HashMap<String, IOContext>();
+
+
+ public static IOContext get(Configuration conf) {
+ if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+ return get();
+ }
+ String inputName = conf.get(Utilities.INPUT_NAME);
+ if (!inputNameIOContextMap.containsKey(inputName)) {
IOContext ioContext = new IOContext();
inputNameIOContextMap.put(inputName, ioContext);
}
@@ -58,26 +72,26 @@ public class IOContext {
inputNameIOContextMap.clear();
}
- long currentBlockStart;
- long nextBlockStart;
- long currentRow;
- boolean isBlockPointer;
- boolean ioExceptions;
+ private long currentBlockStart;
+ private long nextBlockStart;
+ private long currentRow;
+ private boolean isBlockPointer;
+ private boolean ioExceptions;
// Are we using the fact the input is sorted
- boolean useSorted = false;
+ private boolean useSorted = false;
// Are we currently performing a binary search
- boolean isBinarySearching = false;
+ private boolean isBinarySearching = false;
// Do we want to end the binary search
- boolean endBinarySearch = false;
+ private boolean endBinarySearch = false;
// The result of the comparison of the last row processed
- Comparison comparison = null;
+ private Comparison comparison = null;
// The class name of the generic UDF being used by the filter
- String genericUDFClassName = null;
+ private String genericUDFClassName = null;
/**
* supports {@link org.apache.hadoop.hive.ql.metadata.VirtualColumn#ROWID}
*/
- public RecordIdentifier ri;
+ private RecordIdentifier ri;
public static enum Comparison {
GREATER,
@@ -86,7 +100,7 @@ public class IOContext {
UNKNOWN
}
- Path inputPath;
+ private Path inputPath;
public IOContext() {
this.currentBlockStart = 0;
@@ -156,7 +170,7 @@ public class IOContext {
return isBinarySearching;
}
- public void setIsBinarySearching(boolean isBinarySearching) {
+ public void setBinarySearching(boolean isBinarySearching) {
this.isBinarySearching = isBinarySearching;
}
@@ -197,6 +211,14 @@ public class IOContext {
this.genericUDFClassName = genericUDFClassName;
}
+ public RecordIdentifier getRecordIdentifier() {
+ return this.ri;
+ }
+
+ public void setRecordIdentifier(RecordIdentifier ri) {
+ this.ri = ri;
+ }
+
/**
* The thread local IOContext is static, we may need to restart the search if, for instance,
* multiple files are being searched as part of a CombinedHiveRecordReader
@@ -208,4 +230,5 @@ public class IOContext {
this.comparison = null;
this.genericUDFClassName = null;
}
+
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java Fri Jan 23 19:59:11 2015
@@ -29,6 +29,7 @@ import java.util.Map;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -58,7 +59,7 @@ public class SymbolicInputFormat impleme
if (!fStatus.isDir()) {
symlinks = new FileStatus[] { fStatus };
} else {
- symlinks = fileSystem.listStatus(symlinkDir);
+ symlinks = fileSystem.listStatus(symlinkDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
}
toRemovePaths.add(path);
ArrayList<String> aliases = pathToAliases.remove(path);
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java Fri Jan 23 19:59:11 2015
@@ -23,19 +23,15 @@ import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
-import org.apache.hadoop.hive.ql.plan.MapredWork;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
@@ -192,7 +188,7 @@ public class SymlinkTextInputFormat exte
List<Path> targetPaths, List<Path> symlinkPaths) throws IOException {
for (Path symlinkDir : symlinksDirs) {
FileSystem fileSystem = symlinkDir.getFileSystem(conf);
- FileStatus[] symlinks = fileSystem.listStatus(symlinkDir);
+ FileStatus[] symlinks = fileSystem.listStatus(symlinkDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
// Read paths from each symlink file.
for (FileStatus symlink : symlinks) {
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java Fri Jan 23 19:59:11 2015
@@ -60,7 +60,7 @@ public class AvroContainerOutputFormat
Properties properties, Progressable progressable) throws IOException {
Schema schema;
try {
- schema = AvroSerdeUtils.determineSchemaOrThrowException(properties);
+ schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
} catch (AvroSerdeException e) {
throw new IOException(e);
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java Fri Jan 23 19:59:11 2015
@@ -109,7 +109,7 @@ public class AvroGenericRecordReader imp
Properties props = pathsAndParts.getValue().getProperties();
if(props.containsKey(AvroSerdeUtils.SCHEMA_LITERAL) || props.containsKey(AvroSerdeUtils.SCHEMA_URL)) {
- return AvroSerdeUtils.determineSchemaOrThrowException(props);
+ return AvroSerdeUtils.determineSchemaOrThrowException(job, props);
}
else {
return null; // If it's not in this property, it won't be in any others
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java Fri Jan 23 19:59:11 2015
@@ -27,4 +27,10 @@ public interface ColumnStatistics {
* @return the number of values
*/
long getNumberOfValues();
+
+ /**
+ * Returns true if there are nulls in the scope of column statistics.
+ * @return true if null present else false
+ */
+ boolean hasNull();
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Fri Jan 23 19:59:11 2015
@@ -17,8 +17,6 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
-import java.sql.Timestamp;
-
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -26,6 +24,8 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import java.sql.Timestamp;
+
class ColumnStatisticsImpl implements ColumnStatistics {
private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
@@ -816,11 +816,16 @@ class ColumnStatisticsImpl implements Co
}
private long count = 0;
+ private boolean hasNull = false;
ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
if (stats.hasNumberOfValues()) {
count = stats.getNumberOfValues();
}
+
+ if (stats.hasHasNull()) {
+ hasNull = stats.getHasNull();
+ }
}
ColumnStatisticsImpl() {
@@ -830,6 +835,10 @@ class ColumnStatisticsImpl implements Co
count += 1;
}
+ void setNull() {
+ hasNull = true;
+ }
+
void updateBoolean(boolean value) {
throw new UnsupportedOperationException("Can't update boolean");
}
@@ -864,10 +873,12 @@ class ColumnStatisticsImpl implements Co
void merge(ColumnStatisticsImpl stats) {
count += stats.count;
+ hasNull |= stats.hasNull;
}
void reset() {
count = 0;
+ hasNull = false;
}
@Override
@@ -876,14 +887,20 @@ class ColumnStatisticsImpl implements Co
}
@Override
+ public boolean hasNull() {
+ return hasNull;
+ }
+
+ @Override
public String toString() {
- return "count: " + count;
+ return "count: " + count + " hasNull: " + hasNull;
}
OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder builder =
OrcProto.ColumnStatistics.newBuilder();
builder.setNumberOfValues(count);
+ builder.setHasNull(hasNull);
return builder;
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Fri Jan 23 19:59:11 2015
@@ -17,15 +17,6 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.Map;
-
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
@@ -46,6 +37,14 @@ import org.apache.hadoop.io.LongWritable
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
/**
* A tool for printing out the file structure of ORC files.
*/
@@ -170,10 +169,7 @@ public final class FileDump {
buf.append("no stats at ");
} else {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
- Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs);
- buf.append(" count: ").append(cs.getNumberOfValues());
- buf.append(" min: ").append(min);
- buf.append(" max: ").append(max);
+ buf.append(cs.toString());
}
buf.append(" positions: ");
for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Fri Jan 23 19:59:11 2015
@@ -18,18 +18,9 @@
package org.apache.hadoop.hive.ql.io.orc;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.NavigableMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -58,9 +49,9 @@ import org.apache.hadoop.hive.ql.io.Reco
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -78,9 +69,18 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* A MapReduce/Hive input format for ORC files.
* <p>
@@ -919,13 +919,8 @@ public class OrcInputFormat implements
if (filterColumns[pred] != -1) {
// column statistics at index 0 contains only the number of rows
- ColumnStatistics stats =
- stripeStatistics.getColumnStatistics()[filterColumns[pred]];
- Object minValue = RecordReaderImpl.getMin(stats);
- Object maxValue = RecordReaderImpl.getMax(stats);
- truthValues[pred] =
- RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
- minValue, maxValue);
+ ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+ truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
} else {
// parition column case.
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Fri Jan 23 19:59:11 2015
@@ -2377,21 +2377,36 @@ public class RecordReaderImpl implements
/**
* Evaluate a predicate with respect to the statistics from the column
* that is referenced in the predicate.
- * @param index the statistics for the column mentioned in the predicate
+ * @param statsProto the statistics for the column mentioned in the predicate
* @param predicate the leaf predicate we need to evaluation
* @return the set of truth values that may be returned for the given
* predicate.
*/
- static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index,
+ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
PredicateLeaf predicate) {
- ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index);
+ ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
Object minValue = getMin(cs);
Object maxValue = getMax(cs);
- return evaluatePredicateRange(predicate, minValue, maxValue);
+ return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull());
+ }
+
+ /**
+ * Evaluate a predicate with respect to the statistics from the column
+ * that is referenced in the predicate.
+ * @param stats the statistics for the column mentioned in the predicate
+ * @param predicate the leaf predicate we need to evaluation
+ * @return the set of truth values that may be returned for the given
+ * predicate.
+ */
+ static TruthValue evaluatePredicate(ColumnStatistics stats,
+ PredicateLeaf predicate) {
+ Object minValue = getMin(stats);
+ Object maxValue = getMax(stats);
+ return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull());
}
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max) {
+ Object max, boolean hasNull) {
// if we didn't have any values, everything must have been null
if (min == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2426,29 +2441,29 @@ public class RecordReaderImpl implements
case EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (minValue.equals(maxValue) && loc == Location.MIN) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE || loc == Location.AFTER) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case LESS_THAN:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.AFTER) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE || loc == Location.MIN) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case LESS_THAN_EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.AFTER || loc == Location.MAX) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case IN:
if (minValue.equals(maxValue)) {
@@ -2458,10 +2473,10 @@ public class RecordReaderImpl implements
predObj = getBaseObjectForComparison(arg, minValue);
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
}
}
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
// are all of the values outside of the range?
for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
@@ -2469,10 +2484,10 @@ public class RecordReaderImpl implements
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN || loc == Location.MIDDLE ||
loc == Location.MAX) {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
}
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
}
case BETWEEN:
List<Object> args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC);
@@ -2484,26 +2499,26 @@ public class RecordReaderImpl implements
Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
if (loc2 == Location.AFTER || loc2 == Location.MAX) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc2 == Location.BEFORE) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
} else if (loc == Location.AFTER) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case IS_NULL:
- return TruthValue.YES_NO;
+ return hasNull ? TruthValue.YES : TruthValue.NO;
default:
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
// in case failed conversion, return the default YES_NO_NULL truth value
} catch (NumberFormatException nfe) {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Fri Jan 23 19:59:11 2015
@@ -656,6 +656,8 @@ class WriterImpl implements Writer, Memo
void write(Object obj) throws IOException {
if (obj != null) {
indexStatistics.increment();
+ } else {
+ indexStatistics.setNull();
}
if (isPresent != null) {
isPresent.write(obj == null ? 0 : 1);
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Fri Jan 23 19:59:11 2015
@@ -66,8 +66,8 @@ public class DataWritableWriter {
writeGroupFields(record, schema);
} catch (RuntimeException e) {
String errorMessage = "Parquet record is malformed: " + e.getMessage();
- LOG.error(errorMessage);
- throw new RuntimeException(errorMessage);
+ LOG.error(errorMessage, e);
+ throw new RuntimeException(errorMessage, e);
}
recordConsumer.endMessage();
}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java Fri Jan 23 19:59:11 2015
@@ -51,7 +51,6 @@ import org.apache.hadoop.hive.ql.session
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.stats.StatsFactory;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
-import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileInputFormat;
@@ -148,7 +147,7 @@ public class PartialScanTask extends Tas
job.setInputFormat((Class<? extends InputFormat>) (Class
.forName(inpFormat)));
} catch (ClassNotFoundException e) {
- throw new RuntimeException(e.getMessage());
+ throw new RuntimeException(e.getMessage(), e);
}
job.setOutputKeyClass(NullWritable.class);
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java Fri Jan 23 19:59:11 2015
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.io.Hive
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileInputFormat;
@@ -124,7 +123,7 @@ public class ColumnTruncateTask extends
job.setInputFormat((Class<? extends InputFormat>) (Class
.forName(inpFormat)));
} catch (ClassNotFoundException e) {
- throw new RuntimeException(e.getMessage());
+ throw new RuntimeException(e.getMessage(), e);
}
Path outputPath = this.work.getOutputDir();
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java?rev=1654355&r1=1654354&r2=1654355&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java Fri Jan 23 19:59:11 2015
@@ -254,9 +254,13 @@ class DummyTxnManager extends HiveTxnMan
private HiveLockMode getWriteEntityLockMode (WriteEntity we) {
HiveLockMode lockMode = we.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED;
- //but the writeEntity is complete in DDL operations, and we need check its writeType to
- //to determine the lockMode
- switch (we.getWriteType()) {
+ //but the writeEntity is complete in DDL operations, instead DDL sets the writeType, so
+ //we use it to determine its lockMode, and first we check if the writeType was set
+ WriteEntity.WriteType writeType = we.getWriteType();
+ if (writeType == null) {
+ return lockMode;
+ }
+ switch (writeType) {
case DDL_EXCLUSIVE:
return HiveLockMode.EXCLUSIVE;
case DDL_SHARED: