You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/09/20 19:34:43 UTC
svn commit: r1626482 [3/6] - in /hive/branches/spark: ./
accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/
accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/
common/src/java/org/apache/hadoop/hive/conf/ data/files/ hcatalog/hc...
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java Sat Sep 20 17:34:39 2014
@@ -46,15 +46,7 @@ public class VectorUDAFCount extends Vec
private static final long serialVersionUID = 1L;
- transient private long value;
- transient private boolean isNull;
-
- public void initIfNull() {
- if (isNull) {
- isNull = false;
- value = 0;
- }
- }
+ transient private long count;
@Override
public int getVariableSize() {
@@ -63,8 +55,7 @@ public class VectorUDAFCount extends Vec
@Override
public void reset() {
- isNull = true;
- value = 0L;
+ count = 0L;
}
}
@@ -131,8 +122,7 @@ public class VectorUDAFCount extends Vec
aggregationBufferSets,
aggregateIndex,
i);
- myagg.initIfNull();
- myagg.value++;
+ myagg.count++;
}
}
@@ -148,8 +138,7 @@ public class VectorUDAFCount extends Vec
aggregationBufferSets,
aggregateIndex,
i);
- myagg.initIfNull();
- myagg.value++;
+ myagg.count++;
}
}
}
@@ -168,8 +157,7 @@ public class VectorUDAFCount extends Vec
aggregationBufferSets,
aggregateIndex,
j);
- myagg.initIfNull();
- myagg.value++;
+ myagg.count++;
}
}
}
@@ -191,17 +179,15 @@ public class VectorUDAFCount extends Vec
Aggregation myagg = (Aggregation)agg;
- myagg.initIfNull();
-
if (inputVector.isRepeating) {
if (inputVector.noNulls || !inputVector.isNull[0]) {
- myagg.value += batchSize;
+ myagg.count += batchSize;
}
return;
}
if (inputVector.noNulls) {
- myagg.value += batchSize;
+ myagg.count += batchSize;
return;
}
else if (!batch.selectedInUse) {
@@ -221,7 +207,7 @@ public class VectorUDAFCount extends Vec
for (int j=0; j< batchSize; ++j) {
int i = selected[j];
if (!isNull[i]) {
- myagg.value += 1;
+ myagg.count += 1;
}
}
}
@@ -233,7 +219,7 @@ public class VectorUDAFCount extends Vec
for (int i=0; i< batchSize; ++i) {
if (!isNull[i]) {
- myagg.value += 1;
+ myagg.count += 1;
}
}
}
@@ -251,14 +237,9 @@ public class VectorUDAFCount extends Vec
@Override
public Object evaluateOutput(AggregationBuffer agg) throws HiveException {
- Aggregation myagg = (Aggregation) agg;
- if (myagg.isNull) {
- return null;
- }
- else {
- result.set (myagg.value);
+ Aggregation myagg = (Aggregation) agg;
+ result.set (myagg.count);
return result;
- }
}
@Override
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java Sat Sep 20 17:34:39 2014
@@ -44,8 +44,7 @@ public class VectorUDAFCountStar extends
private static final long serialVersionUID = 1L;
- transient private long value;
- transient private boolean isNull;
+ transient private long count;
@Override
public int getVariableSize() {
@@ -54,8 +53,7 @@ public class VectorUDAFCountStar extends
@Override
public void reset() {
- isNull = true;
- value = 0L;
+ count = 0L;
}
}
@@ -95,8 +93,7 @@ public class VectorUDAFCountStar extends
for (int i=0; i < batchSize; ++i) {
Aggregation myAgg = getCurrentAggregationBuffer(
aggregationBufferSets, aggregateIndex, i);
- myAgg.isNull = false;
- ++myAgg.value;
+ ++myAgg.count;
}
}
@@ -111,8 +108,7 @@ public class VectorUDAFCountStar extends
}
Aggregation myagg = (Aggregation)agg;
- myagg.isNull = false;
- myagg.value += batchSize;
+ myagg.count += batchSize;
}
@Override
@@ -128,14 +124,9 @@ public class VectorUDAFCountStar extends
@Override
public Object evaluateOutput(AggregationBuffer agg) throws HiveException {
- Aggregation myagg = (Aggregation) agg;
- if (myagg.isNull) {
- return null;
- }
- else {
- result.set (myagg.value);
+ Aggregation myagg = (Aggregation) agg;
+ result.set (myagg.count);
return result;
- }
}
@Override
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java Sat Sep 20 17:34:39 2014
@@ -305,6 +305,28 @@ public class AcidUtils {
}
/**
+ * Is the given directory in ACID format?
+ * @param directory the partition directory to check
+ * @param conf the query configuration
+ * @return true, if it is an ACID directory
+ * @throws IOException
+ */
+ public static boolean isAcid(Path directory,
+ Configuration conf) throws IOException {
+ FileSystem fs = directory.getFileSystem(conf);
+ for(FileStatus file: fs.listStatus(directory)) {
+ String filename = file.getPath().getName();
+ if (filename.startsWith(BASE_PREFIX) ||
+ filename.startsWith(DELTA_PREFIX)) {
+ if (file.isDir()) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
* Get the ACID state of the given directory. It finds the minimal set of
* base and diff directories. Note that because major compactions don't
* preserve the history, we can't use a base directory that includes a
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Sat Sep 20 17:34:39 2014
@@ -122,24 +122,7 @@ public class BucketizedHiveInputFormat<K
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
init(job);
- Path[] dirs = FileInputFormat.getInputPaths(job);
- if (dirs.length == 0) {
- // on tez we're avoiding to duplicate the file info in FileInputFormat.
- if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
- try {
- List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
- dirs = paths.toArray(new Path[paths.size()]);
- if (dirs.length == 0) {
- // if we still don't have any files it's time to fail.
- throw new IOException("No input paths specified in job");
- }
- } catch (Exception e) {
- throw new IOException("Could not create input paths", e);
- }
- } else {
- throw new IOException("No input paths specified in job");
- }
- }
+ Path[] dirs = getInputPaths(job);
JobConf newjob = new JobConf(job);
ArrayList<InputSplit> result = new ArrayList<InputSplit>();
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Sat Sep 20 17:34:39 2014
@@ -33,6 +33,7 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -264,8 +265,8 @@ public class CombineHiveInputFormat<K ex
/**
* Create Hive splits based on CombineFileSplit.
*/
- @Override
- public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+ private InputSplit[] getCombineSplits(JobConf job,
+ int numSplits) throws IOException {
PerfLogger perfLogger = PerfLogger.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
init(job);
@@ -274,17 +275,6 @@ public class CombineHiveInputFormat<K ex
mrwork.getAliasToWork();
CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()
.getCombineFileInputFormat();
-
- // on tez we're avoiding duplicating path info since the info will go over
- // rpc
- if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
- try {
- List<Path> dirs = Utilities.getInputPathsTez(job, mrwork);
- Utilities.setInputPaths(job, dirs);
- } catch (Exception e) {
- throw new IOException("Could not create input paths", e);
- }
- }
InputSplit[] splits = null;
if (combine == null) {
@@ -327,13 +317,6 @@ public class CombineHiveInputFormat<K ex
// ignore
}
FileSystem inpFs = path.getFileSystem(job);
- if (inputFormatClass.isAssignableFrom(OrcInputFormat.class)) {
- if (inpFs.exists(new Path(path, OrcRecordUpdater.ACID_FORMAT))) {
- throw new IOException("CombineHiveInputFormat is incompatible " +
- " with ACID tables. Please set hive.input.format=" +
- "org.apache.hadoop.hive.ql.io.HiveInputFormat");
- }
- }
// Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
// we use a configuration variable for the same
@@ -461,6 +444,84 @@ public class CombineHiveInputFormat<K ex
return result.toArray(new CombineHiveInputSplit[result.size()]);
}
+ /**
+ * Create Hive splits based on CombineFileSplit.
+ */
+ @Override
+ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+ init(job);
+ Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
+ Map<String, Operator<? extends OperatorDesc>> aliasToWork =
+ mrwork.getAliasToWork();
+
+ ArrayList<InputSplit> result = new ArrayList<InputSplit>();
+
+ Path[] paths = getInputPaths(job);
+
+ List<Path> nonCombinablePaths = new ArrayList<Path>(paths.length / 2);
+ List<Path> combinablePaths = new ArrayList<Path>(paths.length / 2);
+
+ for (Path path : paths) {
+
+ PartitionDesc part =
+ HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+ pathToPartitionInfo, path,
+ IOPrepareCache.get().allocatePartitionDescMap());
+
+ // Use HiveInputFormat if any of the paths is not splittable
+ Class inputFormatClass = part.getInputFileFormatClass();
+ String inputFormatClassName = inputFormatClass.getName();
+ InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
+ if (inputFormat instanceof AvoidSplitCombination &&
+ ((AvoidSplitCombination) inputFormat).shouldSkipCombine(path, job)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("The split [" + path +
+ "] is being parked for HiveInputFormat.getSplits");
+ }
+ nonCombinablePaths.add(path);
+ } else {
+ combinablePaths.add(path);
+ }
+ }
+
+ // Store the previous value for the path specification
+ String oldPaths = job.get(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("The received input paths are: [" + oldPaths +
+ "] against the property "
+ + HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname);
+ }
+
+ // Process the normal splits
+ if (nonCombinablePaths.size() > 0) {
+ FileInputFormat.setInputPaths(job, nonCombinablePaths.toArray
+ (new Path[nonCombinablePaths.size()]));
+ InputSplit[] splits = super.getSplits(job, numSplits);
+ for (InputSplit split : splits) {
+ result.add(split);
+ }
+ }
+
+ // Process the combine splits
+ if (combinablePaths.size() > 0) {
+ FileInputFormat.setInputPaths(job, combinablePaths.toArray
+ (new Path[combinablePaths.size()]));
+ InputSplit[] splits = getCombineSplits(job, numSplits);
+ for (InputSplit split : splits) {
+ result.add(split);
+ }
+ }
+
+ // Restore the old path information back
+ // This is just to prevent incompatibilities with previous versions Hive
+ // if some application depends on the original value being set.
+ if (oldPaths != null) {
+ job.set(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname, oldPaths);
+ }
+ LOG.info("Number of all splits " + result.size());
+ return result.toArray(new InputSplit[result.size()]);
+ }
+
private void processPaths(JobConf job, CombineFileInputFormatShim combine,
List<InputSplitShim> iss, Path... path) throws IOException {
JobConf currJob = new JobConf(job);
@@ -635,4 +696,12 @@ public class CombineHiveInputFormat<K ex
return s.toString();
}
}
+
+ /**
+ * This is a marker interface that is used to identify the formats where
+ * combine split generation is not applicable
+ */
+ public interface AvoidSplitCombination {
+ boolean shouldSkipCombine(Path path, Configuration conf) throws IOException;
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Sat Sep 20 17:34:39 2014
@@ -295,11 +295,7 @@ public class HiveInputFormat<K extends W
}
}
- public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
- perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
- init(job);
-
+ Path[] getInputPaths(JobConf job) throws IOException {
Path[] dirs = FileInputFormat.getInputPaths(job);
if (dirs.length == 0) {
// on tez we're avoiding to duplicate the file info in FileInputFormat.
@@ -314,6 +310,14 @@ public class HiveInputFormat<K extends W
throw new IOException("No input paths specified in job");
}
}
+ return dirs;
+ }
+
+ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
+ init(job);
+ Path[] dirs = getInputPaths(job);
JobConf newjob = new JobConf(job);
List<InputSplit> result = new ArrayList<InputSplit>();
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java Sat Sep 20 17:34:39 2014
@@ -21,6 +21,8 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.EnumSet;
+import javax.annotation.Nullable;
+
interface CompressionCodec {
public enum Modifier {
@@ -62,6 +64,6 @@ interface CompressionCodec {
* @param modifiers compression modifiers
* @return codec for use after optional modification
*/
- CompressionCodec modify(EnumSet<Modifier> modifiers);
+ CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers);
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Sat Sep 20 17:34:39 2014
@@ -24,6 +24,8 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.TreeMap;
+import java.util.NavigableMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
@@ -46,6 +48,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
@@ -99,7 +102,8 @@ import com.google.common.util.concurrent
*/
public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
InputFormatChecker, VectorizedInputFormatInterface,
- AcidInputFormat<NullWritable, OrcStruct> {
+ AcidInputFormat<NullWritable, OrcStruct>,
+ CombineHiveInputFormat.AvoidSplitCombination {
private static final Log LOG = LogFactory.getLog(OrcInputFormat.class);
static final HadoopShims SHIMS = ShimLoader.getHadoopShims();
@@ -125,6 +129,12 @@ public class OrcInputFormat implements
*/
private static final double MIN_INCLUDED_LOCATION = 0.80;
+ @Override
+ public boolean shouldSkipCombine(Path path,
+ Configuration conf) throws IOException {
+ return AcidUtils.isAcid(path, conf);
+ }
+
private static class OrcRecordReader
implements org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct>,
StatsProvidingRecordReader {
@@ -610,7 +620,7 @@ public class OrcInputFormat implements
private final FileSystem fs;
private final FileStatus file;
private final long blockSize;
- private final BlockLocation[] locations;
+ private final TreeMap<Long, BlockLocation> locations;
private final FileInfo fileInfo;
private List<StripeInformation> stripes;
private ReaderImpl.FileMetaInfo fileMetaInfo;
@@ -630,7 +640,7 @@ public class OrcInputFormat implements
this.file = file;
this.blockSize = file.getBlockSize();
this.fileInfo = fileInfo;
- locations = SHIMS.getLocations(fs, file);
+ locations = SHIMS.getLocationsWithOffset(fs, file);
this.isOriginal = isOriginal;
this.deltas = deltas;
this.hasBase = hasBase;
@@ -641,8 +651,8 @@ public class OrcInputFormat implements
}
void schedule() throws IOException {
- if(locations.length == 1 && file.getLen() < context.maxSize) {
- String[] hosts = locations[0].getHosts();
+ if(locations.size() == 1 && file.getLen() < context.maxSize) {
+ String[] hosts = locations.firstEntry().getValue().getHosts();
synchronized (context.splits) {
context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(),
hosts, fileMetaInfo, isOriginal, hasBase, deltas));
@@ -690,15 +700,22 @@ public class OrcInputFormat implements
void createSplit(long offset, long length,
ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException {
String[] hosts;
- if ((offset % blockSize) + length <= blockSize) {
+ Map.Entry<Long, BlockLocation> startEntry = locations.floorEntry(offset);
+ BlockLocation start = startEntry.getValue();
+ if (offset + length <= start.getOffset() + start.getLength()) {
// handle the single block case
- hosts = locations[(int) (offset / blockSize)].getHosts();
+ hosts = start.getHosts();
} else {
+ Map.Entry<Long, BlockLocation> endEntry = locations.floorEntry(offset + length);
+ BlockLocation end = endEntry.getValue();
+ //get the submap
+ NavigableMap<Long, BlockLocation> navigableMap = locations.subMap(startEntry.getKey(),
+ true, endEntry.getKey(), true);
// Calculate the number of bytes in the split that are local to each
// host.
Map<String, LongWritable> sizes = new HashMap<String, LongWritable>();
long maxSize = 0;
- for(BlockLocation block: locations) {
+ for (BlockLocation block : navigableMap.values()) {
long overlap = getOverlap(offset, length, block.getOffset(),
block.getLength());
if (overlap > 0) {
@@ -711,6 +728,9 @@ public class OrcInputFormat implements
val.set(val.get() + overlap);
maxSize = Math.max(maxSize, val.get());
}
+ } else {
+ throw new IOException("File " + file.getPath().toString() +
+ " should have had overlap on block starting at " + block.getOffset());
}
}
// filter the list of locations to those that have at least 80% of the
@@ -718,7 +738,7 @@ public class OrcInputFormat implements
long threshold = (long) (maxSize * MIN_INCLUDED_LOCATION);
List<String> hostList = new ArrayList<String>();
// build the locations in a predictable order to simplify testing
- for(BlockLocation block: locations) {
+ for(BlockLocation block: navigableMap.values()) {
for(String host: block.getHosts()) {
if (sizes.containsKey(host)) {
if (sizes.get(host).get() >= threshold) {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Sep 20 17:34:39 2014
@@ -77,6 +77,7 @@ import com.google.common.collect.Compari
class RecordReaderImpl implements RecordReader {
private static final Log LOG = LogFactory.getLog(RecordReaderImpl.class);
+ private static final boolean isLogTraceEnabled = LOG.isTraceEnabled();
private final FSDataInputStream file;
private final long firstRow;
@@ -3133,9 +3134,9 @@ class RecordReaderImpl implements Record
// find the next row
rowInStripe += 1;
advanceToNextRow(rowInStripe + rowBaseInStripe);
- if (LOG.isDebugEnabled()) {
- LOG.debug("row from " + reader.path);
- LOG.debug("orc row = " + result);
+ if (isLogTraceEnabled) {
+ LOG.trace("row from " + reader.path);
+ LOG.trace("orc row = " + result);
}
return result;
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Sat Sep 20 17:34:39 2014
@@ -485,6 +485,7 @@ class WriterImpl implements Writer, Memo
modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY);
break;
default:
+ LOG.warn("Missing ORC compression modifiers for " + kind);
modifiers = null;
break;
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java Sat Sep 20 17:34:39 2014
@@ -24,6 +24,8 @@ import java.util.zip.DataFormatException
import java.util.zip.Deflater;
import java.util.zip.Inflater;
+import javax.annotation.Nullable;
+
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
@@ -130,7 +132,12 @@ class ZlibCodec implements CompressionCo
}
@Override
- public CompressionCodec modify(EnumSet<Modifier> modifiers) {
+ public CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers) {
+
+ if (modifiers == null) {
+ return this;
+ }
+
int l = this.level;
int s = this.strategy;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java Sat Sep 20 17:34:39 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.lockmgr
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
import org.apache.hadoop.hive.ql.metadata.*;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.KeeperException;
@@ -73,31 +74,6 @@ public class ZooKeeperHiveLockManager im
}
/**
- * @param conf The hive configuration
- * Get the quorum server address from the configuration. The format is:
- * host1:port, host2:port..
- **/
- @VisibleForTesting
- static String getQuorumServers(HiveConf conf) {
- String[] hosts = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM).split(",");
- String port = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT);
- StringBuilder quorum = new StringBuilder();
- for(int i=0; i<hosts.length; i++) {
- quorum.append(hosts[i].trim());
- if (!hosts[i].contains(":")) {
- // if the hostname doesn't contain a port, add the configured port to hostname
- quorum.append(":");
- quorum.append(port);
- }
-
- if (i != hosts.length-1)
- quorum.append(",");
- }
-
- return quorum.toString();
- }
-
- /**
* @param ctx The lock manager context (containing the Hive configuration file)
* Start the ZooKeeper client based on the zookeeper cluster specified in the conf.
**/
@@ -105,7 +81,7 @@ public class ZooKeeperHiveLockManager im
this.ctx = ctx;
HiveConf conf = ctx.getConf();
sessionTimeout = conf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT);
- quorumServers = ZooKeeperHiveLockManager.getQuorumServers(conf);
+ quorumServers = ZooKeeperHiveHelper.getQuorumServers(conf);
sleepTime = conf.getTimeVar(
HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES, TimeUnit.MILLISECONDS);
@@ -146,7 +122,7 @@ public class ZooKeeperHiveLockManager im
return;
}
- zooKeeper = new ZooKeeper(quorumServers, sessionTimeout, new DummyWatcher());
+ zooKeeper = new ZooKeeper(quorumServers, sessionTimeout, new ZooKeeperHiveHelper.DummyWatcher());
}
/**
@@ -517,8 +493,8 @@ public class ZooKeeperHiveLockManager im
ZooKeeper zkpClient = null;
try {
int sessionTimeout = conf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT);
- String quorumServers = getQuorumServers(conf);
- Watcher dummyWatcher = new DummyWatcher();
+ String quorumServers = ZooKeeperHiveHelper.getQuorumServers(conf);
+ Watcher dummyWatcher = new ZooKeeperHiveHelper.DummyWatcher();
zkpClient = new ZooKeeper(quorumServers, sessionTimeout, dummyWatcher);
String parent = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_NAMESPACE);
List<HiveLock> locks = getLocks(conf, zkpClient, null, parent, false, false);
@@ -629,7 +605,8 @@ public class ZooKeeperHiveLockManager im
if (fetchData) {
try {
- data = new HiveLockObjectData(new String(zkpClient.getData(curChild, new DummyWatcher(), null)));
+ data = new HiveLockObjectData(new String(zkpClient.getData(curChild,
+ new ZooKeeperHiveHelper.DummyWatcher(), null)));
data.setClientIp(clientIp);
} catch (Exception e) {
LOG.error("Error in getting data for " + curChild, e);
@@ -789,11 +766,6 @@ public class ZooKeeperHiveLockManager im
return null;
}
- public static class DummyWatcher implements Watcher {
- public void process(org.apache.zookeeper.WatchedEvent event) {
- }
- }
-
@Override
public void prepareRetry() throws LockException {
try {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java Sat Sep 20 17:34:39 2014
@@ -193,11 +193,12 @@ public class StatsOptimizer implements T
}
SelectOperator selOp = (SelectOperator)tsOp.getChildren().get(0);
for(ExprNodeDesc desc : selOp.getConf().getColList()) {
- if (!(desc instanceof ExprNodeColumnDesc)) {
+ if (!((desc instanceof ExprNodeColumnDesc) || (desc instanceof ExprNodeConstantDesc))) {
// Probably an expression, cant handle that
return null;
}
}
+ Map<String, ExprNodeDesc> exprMap = selOp.getColumnExprMap();
// Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS
// we need not to do any instanceof checks for following.
GroupByOperator gbyOp = (GroupByOperator)selOp.getChildren().get(0);
@@ -215,6 +216,12 @@ public class StatsOptimizer implements T
return null;
}
+ for(ExprNodeDesc desc : selOp.getConf().getColList()) {
+ if (!(desc instanceof ExprNodeColumnDesc)) {
+ // Probably an expression, cant handle that
+ return null;
+ }
+ }
FileSinkOperator fsOp = (FileSinkOperator)(selOp.getChildren().get(0));
if (fsOp.getChildOperators() != null && fsOp.getChildOperators().size() > 0) {
// looks like a subq plan.
@@ -236,22 +243,28 @@ public class StatsOptimizer implements T
GenericUDAFResolver udaf =
FunctionRegistry.getGenericUDAFResolver(aggr.getGenericUDAFName());
if (udaf instanceof GenericUDAFSum) {
- if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){
+ ExprNodeDesc desc = aggr.getParameters().get(0);
+ String constant;
+ if (desc instanceof ExprNodeConstantDesc) {
+ constant = ((ExprNodeConstantDesc) desc).getValue().toString();
+ } else if (desc instanceof ExprNodeColumnDesc && exprMap.get(((ExprNodeColumnDesc)desc).getColumn()) instanceof ExprNodeConstantDesc) {
+ constant = ((ExprNodeConstantDesc)exprMap.get(((ExprNodeColumnDesc)desc).getColumn())).getValue().toString();
+ } else {
return null;
}
Long rowCnt = getRowCnt(pctx, tsOp, tbl);
if(rowCnt == null) {
return null;
}
- oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0))
- .getValue().toString()).multiply(HiveDecimal.create(rowCnt)));
+ oneRow.add(HiveDecimal.create(constant).multiply(HiveDecimal.create(rowCnt)));
ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
PrimitiveCategory.DECIMAL));
}
else if (udaf instanceof GenericUDAFCount) {
Long rowCnt = 0L;
- if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof
- ExprNodeConstantDesc)) {
+ if (aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof
+ ExprNodeConstantDesc || ((aggr.getParameters().get(0) instanceof ExprNodeColumnDesc) &&
+ exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()) instanceof ExprNodeConstantDesc)) {
// Its either count (*) or count(1) case
rowCnt = getRowCnt(pctx, tsOp, tbl);
if(rowCnt == null) {
@@ -259,12 +272,7 @@ public class StatsOptimizer implements T
}
} else {
// Its count(col) case
- if (!(aggr.getParameters().get(0) instanceof ExprNodeColumnDesc)) {
- // this is weird, we got expr or something in there, bail out
- Log.debug("Unexpected expression : " + aggr.getParameters().get(0));
- return null;
- }
- ExprNodeColumnDesc desc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
+ ExprNodeColumnDesc desc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
String colName = desc.getColumn();
StatType type = getType(desc.getTypeString());
if(!tbl.isPartitioned()) {
@@ -330,7 +338,7 @@ public class StatsOptimizer implements T
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
} else if (udaf instanceof GenericUDAFMax) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
+ ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
String colName = colDesc.getColumn();
StatType type = getType(colDesc.getTypeString());
if(!tbl.isPartitioned()) {
@@ -419,7 +427,7 @@ public class StatsOptimizer implements T
}
}
} else if (udaf instanceof GenericUDAFMin) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
+ ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
String colName = colDesc.getColumn();
StatType type = getType(colDesc.getTypeString());
if (!tbl.isPartitioned()) {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Sat Sep 20 17:34:39 2014
@@ -115,6 +115,10 @@ public abstract class BaseSemanticAnalyz
protected LineageInfo linfo;
protected TableAccessInfo tableAccessInfo;
protected ColumnAccessInfo columnAccessInfo;
+ /**
+ * Columns accessed by updates
+ */
+ protected ColumnAccessInfo updateColumnAccessInfo;
public boolean skipAuthorization() {
@@ -866,6 +870,14 @@ public abstract class BaseSemanticAnalyz
this.columnAccessInfo = columnAccessInfo;
}
+ public ColumnAccessInfo getUpdateColumnAccessInfo() {
+ return updateColumnAccessInfo;
+ }
+
+ public void setUpdateColumnAccessInfo(ColumnAccessInfo updateColumnAccessInfo) {
+ this.updateColumnAccessInfo = updateColumnAccessInfo;
+ }
+
protected LinkedHashMap<String, String> extractPartitionSpecs(Tree partspec)
throws SemanticException {
LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java Sat Sep 20 17:34:39 2014
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.parse;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -54,4 +56,21 @@ public class ColumnAccessInfo {
}
return mapping;
}
+
+ /**
+ * Strip a virtual column out of the set of columns. This is useful in cases where we do not
+ * want to be checking against the user reading virtual columns, namely update and delete.
+ * @param vc
+ */
+ public void stripVirtualColumn(VirtualColumn vc) {
+ for (Map.Entry<String, Set<String>> e : tableToColumnAccessMap.entrySet()) {
+ for (String columnName : e.getValue()) {
+ if (vc.getName().equalsIgnoreCase(columnName)) {
+ e.getValue().remove(columnName);
+ break;
+ }
+ }
+ }
+
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Sat Sep 20 17:34:39 2014
@@ -1145,7 +1145,10 @@ public class DDLSemanticAnalyzer extends
}
}
- inputs.add(new ReadEntity(getTable(tableName)));
+ Table tbl = getTable(tableName, false);
+ if (tbl != null) {
+ inputs.add(new ReadEntity(getTable(tableName)));
+ }
DropIndexDesc dropIdxDesc = new DropIndexDesc(indexName, tableName);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Sat Sep 20 17:34:39 2014
@@ -1530,8 +1530,8 @@ principalSpecification
principalName
@init {pushMsg("user|group|role name", state);}
@after {popMsg(state);}
- : KW_USER identifier -> ^(TOK_USER identifier)
- | KW_GROUP identifier -> ^(TOK_GROUP identifier)
+ : KW_USER principalIdentifier -> ^(TOK_USER principalIdentifier)
+ | KW_GROUP principalIdentifier -> ^(TOK_GROUP principalIdentifier)
| KW_ROLE identifier -> ^(TOK_ROLE identifier)
;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Sat Sep 20 17:34:39 2014
@@ -536,6 +536,13 @@ functionIdentifier
identifier
;
+principalIdentifier
+@init { gParent.pushMsg("identifier for principal spec", state); }
+@after { gParent.popMsg(state); }
+ : identifier
+ | QuotedIdentifier
+ ;
+
nonReserved
:
KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION |
KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_AN
ALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION | KW_VALUES
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sat Sep 20 17:34:39 2014
@@ -47,7 +47,6 @@ import org.apache.hadoop.fs.FSDataOutput
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
@@ -805,7 +804,8 @@ public class SemanticAnalyzer extends Ba
return PlanUtils.stripQuotes(expr.getText());
case HiveParser.KW_FALSE:
- return "FALSE";
+ // UDFToBoolean casts any non-empty string to true, so set this to false
+ return "";
case HiveParser.KW_TRUE:
return "TRUE";
@@ -813,6 +813,10 @@ public class SemanticAnalyzer extends Ba
case HiveParser.MINUS:
return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0));
+ case HiveParser.TOK_NULL:
+ // Hive's text input will translate this as a null
+ return "\\N";
+
default:
throw new SemanticException("Expression of type " + expr.getText() +
" not supported in insert/values");
@@ -5866,7 +5870,7 @@ public class SemanticAnalyzer extends Ba
if (!isNonNativeTable) {
AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass());
if (acidOp != AcidUtils.Operation.NOT_ACID) {
- checkIfAcidAndOverwriting(qb, table_desc);
+ checkAcidConstraints(qb, table_desc);
}
ltd = new LoadTableDesc(queryTmpdir,table_desc, dpCtx, acidOp);
ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(),
@@ -5973,7 +5977,7 @@ public class SemanticAnalyzer extends Ba
dest_part.isStoredAsSubDirectories(), conf);
AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass());
if (acidOp != AcidUtils.Operation.NOT_ACID) {
- checkIfAcidAndOverwriting(qb, table_desc);
+ checkAcidConstraints(qb, table_desc);
}
ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp);
ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(),
@@ -6233,15 +6237,25 @@ public class SemanticAnalyzer extends Ba
return output;
}
- // Check if we are overwriting any tables. If so, throw an exception as that is not allowed
- // when using an Acid compliant txn manager and operating on an acid table.
- private void checkIfAcidAndOverwriting(QB qb, TableDesc tableDesc) throws SemanticException {
+ // Check constraints on acid tables. This includes
+ // * no insert overwrites
+ // * no use of vectorization
+ // * turns off reduce deduplication optimization, as that sometimes breaks acid
+ // This method assumes you have already decided that this is an Acid write. Don't call it if
+ // that isn't true.
+ private void checkAcidConstraints(QB qb, TableDesc tableDesc) throws SemanticException {
String tableName = tableDesc.getTableName();
if (!qb.getParseInfo().isInsertIntoTable(tableName)) {
LOG.debug("Couldn't find table " + tableName + " in insertIntoTable");
throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg());
}
-
+ if (conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+ LOG.info("Turning off vectorization for acid write operation");
+ conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false);
+ }
+ LOG.info("Modifying config values for ACID write");
+ conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, false);
+ conf.setBoolVar(ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES, true);
}
/**
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Sat Sep 20 17:34:39 2014
@@ -751,12 +751,10 @@ public final class TypeCheckProcFactory
if (myt.getCategory() == Category.LIST) {
// Only allow integer index for now
- if (!(children.get(1) instanceof ExprNodeConstantDesc)
- || !(((ExprNodeConstantDesc) children.get(1)).getTypeInfo()
- .equals(TypeInfoFactory.intTypeInfo))) {
+ if (!FunctionRegistry.implicitConvertible(children.get(1).getTypeInfo(),
+ TypeInfoFactory.intTypeInfo)) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
- expr,
- ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg()));
+ expr, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg()));
}
// Calculate TypeInfo
@@ -764,14 +762,8 @@ public final class TypeCheckProcFactory
desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
.getGenericUDFForIndex(), children);
} else if (myt.getCategory() == Category.MAP) {
- // Only allow constant map key for now
- if (!(children.get(1) instanceof ExprNodeConstantDesc)) {
- throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
- expr,
- ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg()));
- }
- if (!(((ExprNodeConstantDesc) children.get(1)).getTypeInfo()
- .equals(((MapTypeInfo) myt).getMapKeyTypeInfo()))) {
+ if (!FunctionRegistry.implicitConvertible(children.get(1).getTypeInfo(),
+ ((MapTypeInfo) myt).getMapKeyTypeInfo())) {
throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE
.getMsg(expr));
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java Sat Sep 20 17:34:39 2014
@@ -28,11 +28,13 @@ import org.apache.hadoop.hive.ql.io.Acid
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.session.SessionState;
import java.io.IOException;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -127,7 +129,9 @@ public class UpdateDeleteSemanticAnalyze
try {
mTable = db.getTable(tableName[0], tableName[1]);
} catch (HiveException e) {
- throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e);
+ LOG.error("Failed to find table " + getDotName(tableName) + " got exception " +
+ e.getMessage());
+ throw new SemanticException(ErrorMsg.INVALID_TABLE, getDotName(tableName));
}
List<FieldSchema> partCols = mTable.getPartCols();
@@ -148,6 +152,8 @@ public class UpdateDeleteSemanticAnalyze
rewrittenQueryStr.append(" select ROW__ID");
Map<Integer, ASTNode> setColExprs = null;
+ Map<String, ASTNode> setCols = null;
+ Set<String> setRCols = new HashSet<String>();
if (updating()) {
// An update needs to select all of the columns, as we rewrite the entire row. Also,
// we need to figure out which columns we are going to replace. We won't write the set
@@ -160,7 +166,7 @@ public class UpdateDeleteSemanticAnalyze
// Get the children of the set clause, each of which should be a column assignment
List<? extends Node> assignments = setClause.getChildren();
- Map<String, ASTNode> setCols = new HashMap<String, ASTNode>(assignments.size());
+ setCols = new HashMap<String, ASTNode>(assignments.size());
setColExprs = new HashMap<Integer, ASTNode>(assignments.size());
for (Node a : assignments) {
ASTNode assignment = (ASTNode)a;
@@ -173,6 +179,8 @@ public class UpdateDeleteSemanticAnalyze
assert colName.getToken().getType() == HiveParser.Identifier :
"Expected column name";
+ addSetRCols((ASTNode) assignment.getChildren().get(1), setRCols);
+
String columnName = colName.getText();
// Make sure this isn't one of the partitioning columns, that's not supported.
@@ -323,6 +331,28 @@ public class UpdateDeleteSemanticAnalyze
WriteEntity.WriteType.UPDATE);
}
}
+
+ // For updates, we need to set the column access info so that it contains information on
+ // the columns we are updating.
+ if (updating()) {
+ ColumnAccessInfo cai = new ColumnAccessInfo();
+ for (String colName : setCols.keySet()) {
+ cai.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName);
+ }
+ setUpdateColumnAccessInfo(cai);
+
+ // Add the setRCols to the input list
+ for (String colName : setRCols) {
+ columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()),
+ colName);
+ }
+ }
+
+ // We need to weed ROW__ID out of the input column info, as it doesn't make any sense to
+ // require the user to have authorization on that column.
+ if (columnAccessInfo != null) {
+ columnAccessInfo.stripVirtualColumn(VirtualColumn.ROWID);
+ }
}
private String operation() {
@@ -342,4 +372,22 @@ public class UpdateDeleteSemanticAnalyze
}
return false;
}
+
+ // This method find any columns on the right side of a set statement (thus rcols) and puts them
+ // in a set so we can add them to the list of input cols to check.
+ private void addSetRCols(ASTNode node, Set<String> setRCols) {
+
+ // See if this node is a TOK_TABLE_OR_COL. If so, find the value and put it in the list. If
+ // not, recurse on any children
+ if (node.getToken().getType() == HiveParser.TOK_TABLE_OR_COL) {
+ ASTNode colName = (ASTNode)node.getChildren().get(0);
+ assert colName.getToken().getType() == HiveParser.Identifier :
+ "Expected column name";
+ setRCols.add(colName.getText());
+ } else if (node.getChildren() != null) {
+ for (Node n : node.getChildren()) {
+ addSetRCols(node, setRCols);
+ }
+ }
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java Sat Sep 20 17:34:39 2014
@@ -21,6 +21,9 @@ package org.apache.hadoop.hive.ql.proces
import java.util.Arrays;
import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException;
@@ -31,6 +34,7 @@ import org.apache.hadoop.hive.ql.session
import com.google.common.base.Joiner;
class CommandUtil {
+ public static final Log LOG = LogFactory.getLog(CommandUtil.class);
/**
* Authorize command of given type and arguments
@@ -47,14 +51,19 @@ class CommandUtil {
// ss can be null in unit tests
return null;
}
- if (ss.isAuthorizationModeV2()) {
+
+ if (ss.isAuthorizationModeV2() &&
+ HiveConf.getBoolVar(ss.getConf(), HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
+ String errMsg = "Error authorizing command " + command;
try {
authorizeCommandThrowEx(ss, type, command);
// authorized to perform action
return null;
} catch (HiveAuthzPluginException e) {
+ LOG.error(errMsg, e);
return CommandProcessorResponse.create(e);
} catch (HiveAccessControlException e) {
+ LOG.error(errMsg, e);
return CommandProcessorResponse.create(e);
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java Sat Sep 20 17:34:39 2014
@@ -310,9 +310,12 @@ public class AuthorizationUtils {
return HivePrivObjectActionType.INSERT;
case INSERT_OVERWRITE:
return HivePrivObjectActionType.INSERT_OVERWRITE;
+ case UPDATE:
+ return HivePrivObjectActionType.UPDATE;
+ case DELETE:
+ return HivePrivObjectActionType.DELETE;
default:
- // Ignore other types for purposes of authorization, we are interested only
- // in INSERT vs INSERT_OVERWRITE as of now
+ // Ignore other types for purposes of authorization
break;
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java Sat Sep 20 17:34:39 2014
@@ -81,7 +81,7 @@ public class HivePrivilegeObject impleme
GLOBAL, DATABASE, TABLE_OR_VIEW, PARTITION, COLUMN, LOCAL_URI, DFS_URI, COMMAND_PARAMS, FUNCTION
} ;
public enum HivePrivObjectActionType {
- OTHER, INSERT, INSERT_OVERWRITE
+ OTHER, INSERT, INSERT_OVERWRITE, UPDATE, DELETE
};
private final HivePrivilegeObjectType type;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java Sat Sep 20 17:34:39 2014
@@ -118,6 +118,7 @@ public class Operation2Privilege {
private static SQLPrivTypeGrant[] ADMIN_PRIV_AR = arr(SQLPrivTypeGrant.ADMIN_PRIV);
private static SQLPrivTypeGrant[] INS_NOGRANT_AR = arr(SQLPrivTypeGrant.INSERT_NOGRANT);
private static SQLPrivTypeGrant[] DEL_NOGRANT_AR = arr(SQLPrivTypeGrant.DELETE_NOGRANT);
+ private static SQLPrivTypeGrant[] UPD_NOGRANT_AR = arr(SQLPrivTypeGrant.UPDATE_NOGRANT);
private static SQLPrivTypeGrant[] OWNER_INS_SEL_DEL_NOGRANT_AR =
arr(SQLPrivTypeGrant.OWNER_PRIV,
SQLPrivTypeGrant.INSERT_NOGRANT,
@@ -287,8 +288,14 @@ public class Operation2Privilege {
op2Priv.put(HiveOperationType.QUERY,
arr(
new PrivRequirement(SEL_NOGRANT_AR, IOType.INPUT),
- new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, null),
- new PrivRequirement(DEL_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.INSERT_OVERWRITE)
+ new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.INSERT),
+ new PrivRequirement(
+ arr(SQLPrivTypeGrant.INSERT_NOGRANT, SQLPrivTypeGrant.DELETE_NOGRANT),
+ IOType.OUTPUT,
+ HivePrivObjectActionType.INSERT_OVERWRITE),
+ new PrivRequirement(DEL_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.DELETE),
+ new PrivRequirement(UPD_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.UPDATE),
+ new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.OTHER)
)
);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java Sat Sep 20 17:34:39 2014
@@ -37,6 +37,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.Database;
@@ -53,6 +54,8 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilege;
@@ -455,4 +458,23 @@ public class SQLAuthorizationUtils {
return hivePrincipals;
}
+ /**
+ * Change the session context based on configuration to aid in testing of sql
+ * std auth
+ *
+ * @param ctx
+ * @param conf
+ * @return
+ */
+ static HiveAuthzSessionContext applyTestSettings(HiveAuthzSessionContext ctx, HiveConf conf) {
+ if (conf.getBoolVar(ConfVars.HIVE_TEST_AUTHORIZATION_SQLSTD_HS2_MODE)
+ && ctx.getClientType() == CLIENT_TYPE.HIVECLI) {
+ // create new session ctx object with HS2 as client type
+ HiveAuthzSessionContext.Builder ctxBuilder = new HiveAuthzSessionContext.Builder(ctx);
+ ctxBuilder.setClientType(CLIENT_TYPE.HIVESERVER2);
+ return ctxBuilder.build();
+ }
+ return ctx;
+ }
+
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java Sat Sep 20 17:34:39 2014
@@ -90,42 +90,11 @@ public class SQLStdHiveAccessController
HiveAuthenticationProvider authenticator, HiveAuthzSessionContext ctx) throws HiveAuthzPluginException {
this.metastoreClientFactory = metastoreClientFactory;
this.authenticator = authenticator;
- this.sessionCtx = applyTestSettings(ctx, conf);
-
- assertHiveCliAuthDisabled(conf);
- initUserRoles();
+ this.sessionCtx = SQLAuthorizationUtils.applyTestSettings(ctx, conf);
LOG.info("Created SQLStdHiveAccessController for session context : " + sessionCtx);
}
/**
- * Change the session context based on configuration to aid in testing of sql std auth
- * @param ctx
- * @param conf
- * @return
- */
- private HiveAuthzSessionContext applyTestSettings(HiveAuthzSessionContext ctx, HiveConf conf) {
- if(conf.getBoolVar(ConfVars.HIVE_TEST_AUTHORIZATION_SQLSTD_HS2_MODE) &&
- ctx.getClientType() == CLIENT_TYPE.HIVECLI
- ){
- // create new session ctx object with HS2 as client type
- HiveAuthzSessionContext.Builder ctxBuilder = new HiveAuthzSessionContext.Builder(ctx);
- ctxBuilder.setClientType(CLIENT_TYPE.HIVESERVER2);
- return ctxBuilder.build();
- }
- return ctx;
- }
-
- private void assertHiveCliAuthDisabled(HiveConf conf) throws HiveAuthzPluginException {
- if (sessionCtx.getClientType() == CLIENT_TYPE.HIVECLI
- && conf.getBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
- throw new HiveAuthzPluginException(
- "SQL standards based authorization should not be enabled from hive cli"
- + "Instead the use of storage based authorization in hive metastore is reccomended. Set "
- + ConfVars.HIVE_AUTHORIZATION_ENABLED.varname + "=false to disable authz within cli");
- }
- }
-
- /**
* (Re-)initialize currentRoleNames if necessary.
* @throws HiveAuthzPluginException
*/
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java Sat Sep 20 17:34:39 2014
@@ -25,12 +25,15 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationValidator;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal;
@@ -44,16 +47,30 @@ public class SQLStdHiveAuthorizationVali
private final HiveConf conf;
private final HiveAuthenticationProvider authenticator;
private final SQLStdHiveAccessControllerWrapper privController;
+ private final HiveAuthzSessionContext ctx;
public static final Log LOG = LogFactory.getLog(SQLStdHiveAuthorizationValidator.class);
public SQLStdHiveAuthorizationValidator(HiveMetastoreClientFactory metastoreClientFactory,
HiveConf conf, HiveAuthenticationProvider authenticator,
- SQLStdHiveAccessControllerWrapper privilegeManager) {
+ SQLStdHiveAccessControllerWrapper privilegeManager, HiveAuthzSessionContext ctx)
+ throws HiveAuthzPluginException {
this.metastoreClientFactory = metastoreClientFactory;
this.conf = conf;
this.authenticator = authenticator;
this.privController = privilegeManager;
+ this.ctx = SQLAuthorizationUtils.applyTestSettings(ctx, conf);
+ assertHiveCliAuthDisabled(conf);
+ }
+
+ private void assertHiveCliAuthDisabled(HiveConf conf) throws HiveAuthzPluginException {
+ if (ctx.getClientType() == CLIENT_TYPE.HIVECLI
+ && conf.getBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
+ throw new HiveAuthzPluginException(
+ "SQL standards based authorization should not be enabled from hive cli"
+ + "Instead the use of storage based authorization in hive metastore is reccomended. Set "
+ + ConfVars.HIVE_AUTHORIZATION_ENABLED.varname + "=false to disable authz within cli");
+ }
}
@Override
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java Sat Sep 20 17:34:39 2014
@@ -37,7 +37,7 @@ public class SQLStdHiveAuthorizerFactory
return new HiveAuthorizerImpl(
privilegeManager,
new SQLStdHiveAuthorizationValidator(metastoreClientFactory, conf, authenticator,
- privilegeManager)
+ privilegeManager, ctx)
);
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Sat Sep 20 17:34:39 2014
@@ -519,16 +519,17 @@ public class SessionState {
*/
private Path createRootHDFSDir(HiveConf conf) throws IOException {
Path rootHDFSDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR));
- FsPermission expectedHDFSDirPermission = new FsPermission("777");
+ FsPermission writableHDFSDirPermission = new FsPermission((short)00733);
FileSystem fs = rootHDFSDirPath.getFileSystem(conf);
if (!fs.exists(rootHDFSDirPath)) {
- Utilities.createDirsWithPermission(conf, rootHDFSDirPath, expectedHDFSDirPermission, true);
+ Utilities.createDirsWithPermission(conf, rootHDFSDirPath, writableHDFSDirPermission, true);
}
FsPermission currentHDFSDirPermission = fs.getFileStatus(rootHDFSDirPath).getPermission();
LOG.debug("HDFS root scratch dir: " + rootHDFSDirPath + ", permission: "
+ currentHDFSDirPermission);
- // If the root HDFS scratch dir already exists, make sure the permissions are 777.
- if (!expectedHDFSDirPermission.equals(fs.getFileStatus(rootHDFSDirPath).getPermission())) {
+ // If the root HDFS scratch dir already exists, make sure it is writeable.
+ if (!((currentHDFSDirPermission.toShort() & writableHDFSDirPermission
+ .toShort()) == writableHDFSDirPermission.toShort())) {
throw new RuntimeException("The root scratch dir: " + rootHDFSDirPath
+ " on HDFS should be writable. Current permissions are: " + currentHDFSDirPermission);
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java Sat Sep 20 17:34:39 2014
@@ -26,9 +26,11 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.IntWritable;
/**
* GenericUDFIndex.
@@ -36,11 +38,10 @@ import org.apache.hadoop.hive.serde2.obj
*/
@Description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ")
public class GenericUDFIndex extends GenericUDF {
+
private transient MapObjectInspector mapOI;
- private boolean mapKeyPreferWritable;
private transient ListObjectInspector listOI;
- private transient PrimitiveObjectInspector indexOI;
- private transient ObjectInspector returnOI;
+ private transient ObjectInspectorConverters.Converter converter;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -66,21 +67,22 @@ public class GenericUDFIndex extends Gen
}
// index has to be a primitive
- if (arguments[1] instanceof PrimitiveObjectInspector) {
- indexOI = (PrimitiveObjectInspector) arguments[1];
- } else {
+ if (!(arguments[1] instanceof PrimitiveObjectInspector)) {
throw new UDFArgumentTypeException(1, "Primitive Type is expected but "
+ arguments[1].getTypeName() + "\" is found");
}
-
+ PrimitiveObjectInspector inputOI = (PrimitiveObjectInspector) arguments[1];
+ ObjectInspector returnOI;
+ ObjectInspector indexOI;
if (mapOI != null) {
+ indexOI = ObjectInspectorConverters.getConvertedOI(
+ inputOI, mapOI.getMapKeyObjectInspector());
returnOI = mapOI.getMapValueObjectInspector();
- ObjectInspector keyOI = mapOI.getMapKeyObjectInspector();
- mapKeyPreferWritable = ((PrimitiveObjectInspector) keyOI)
- .preferWritable();
} else {
+ indexOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector;
returnOI = listOI.getListElementObjectInspector();
}
+ converter = ObjectInspectorConverters.getConverter(inputOI, indexOI);
return returnOI;
}
@@ -88,35 +90,16 @@ public class GenericUDFIndex extends Gen
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
assert (arguments.length == 2);
- Object main = arguments[0].get();
Object index = arguments[1].get();
+ Object indexObject = converter.convert(index);
+ if (indexObject == null) {
+ return null;
+ }
if (mapOI != null) {
-
- Object indexObject;
- if (mapKeyPreferWritable) {
- indexObject = indexOI.getPrimitiveWritableObject(index);
- } else {
- indexObject = indexOI.getPrimitiveJavaObject(index);
- }
- return mapOI.getMapValueElement(main, indexObject);
-
- } else {
-
- assert (listOI != null);
- int intIndex = 0;
- try {
- intIndex = PrimitiveObjectInspectorUtils.getInt(index, indexOI);
- } catch (NullPointerException e) {
- // If index is null, we should return null.
- return null;
- } catch (NumberFormatException e) {
- // If index is not a number, we should return null.
- return null;
- }
- return listOI.getListElement(main, intIndex);
-
+ return mapOI.getMapValueElement(arguments[0].get(), indexObject);
}
+ return listOI.getListElement(arguments[0].get(), ((IntWritable)indexObject).get());
}
@Override
Modified: hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original)
+++ hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Sat Sep 20 17:34:39 2014
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec;
import java.lang.reflect.Method;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
@@ -80,7 +79,7 @@ public class TestFunctionRegistry extend
}
private void implicit(TypeInfo a, TypeInfo b, boolean convertible) {
- assertEquals(convertible, FunctionRegistry.implicitConvertable(a,b));
+ assertEquals(convertible, FunctionRegistry.implicitConvertible(a, b));
}
public void testImplicitConversion() {
Modified: hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java?rev=1626482&r1=1626481&r2=1626482&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (original)
+++ hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java Sat Sep 20 17:34:39 2014
@@ -18,17 +18,24 @@
package org.apache.hadoop.hive.ql.exec;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.io.IOContext;
import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
import org.apache.hadoop.hive.ql.plan.CollectDesc;
@@ -42,6 +49,10 @@ import org.apache.hadoop.hive.ql.plan.Pl
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.processors.CommandProcessor;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -49,8 +60,14 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.junit.Test;
/**
* TestOperators.
@@ -274,7 +291,7 @@ public class TestOperators extends TestC
cd, sop);
op.initialize(new JobConf(TestOperators.class),
- new ObjectInspector[] {r[0].oi});
+ new ObjectInspector[]{r[0].oi});
// evaluate on row
for (int i = 0; i < 5; i++) {
@@ -379,4 +396,82 @@ public class TestOperators extends TestC
throw (e);
}
}
+
+ @Test
+ public void testFetchOperatorContextQuoting() throws Exception {
+ JobConf conf = new JobConf();
+ ArrayList<Path> list = new ArrayList<Path>();
+ list.add(new Path("hdfs://nn.example.com/fi\tl\\e\t1"));
+ list.add(new Path("hdfs://nn.example.com/file\t2"));
+ list.add(new Path("file:/file3"));
+ FetchOperator.setFetchOperatorContext(conf, list);
+ String[] parts =
+ conf.get(FetchOperator.FETCH_OPERATOR_DIRECTORY_LIST).split("\t");
+ assertEquals(3, parts.length);
+ assertEquals("hdfs://nn.example.com/fi\\tl\\\\e\\t1", parts[0]);
+ assertEquals("hdfs://nn.example.com/file\\t2", parts[1]);
+ assertEquals("file:/file3", parts[2]);
+ }
+
+ /**
+ * A custom input format that checks to make sure that the fetch operator
+ * sets the required attributes.
+ */
+ public static class CustomInFmt extends TextInputFormat {
+
+ @Override
+ public InputSplit[] getSplits(JobConf job, int splits) throws IOException {
+
+ // ensure that the table properties were copied
+ assertEquals("val1", job.get("myprop1"));
+ assertEquals("val2", job.get("myprop2"));
+
+ // ensure that both of the partitions are in the complete list.
+ String[] dirs = job.get("hive.complete.dir.list").split("\t");
+ assertEquals(2, dirs.length);
+ assertEquals(true, dirs[0].endsWith("/state=CA"));
+ assertEquals(true, dirs[1].endsWith("/state=OR"));
+ return super.getSplits(job, splits);
+ }
+ }
+
+ @Test
+ public void testFetchOperatorContext() throws Exception {
+ HiveConf conf = new HiveConf();
+ conf.set("hive.support.concurrency", "false");
+ SessionState.start(conf);
+ String cmd = "create table fetchOp (id int, name string) " +
+ "partitioned by (state string) " +
+ "row format delimited fields terminated by '|' " +
+ "stored as " +
+ "inputformat 'org.apache.hadoop.hive.ql.exec.TestOperators$CustomInFmt' " +
+ "outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' " +
+ "tblproperties ('myprop1'='val1', 'myprop2' = 'val2')";
+ Driver driver = new Driver();
+ driver.init();
+ CommandProcessorResponse response = driver.run(cmd);
+ assertEquals(0, response.getResponseCode());
+ List<Object> result = new ArrayList<Object>();
+
+ cmd = "load data local inpath '../data/files/employee.dat' " +
+ "overwrite into table fetchOp partition (state='CA')";
+ driver.init();
+ response = driver.run(cmd);
+ assertEquals(0, response.getResponseCode());
+
+ cmd = "load data local inpath '../data/files/employee2.dat' " +
+ "overwrite into table fetchOp partition (state='OR')";
+ driver.init();
+ response = driver.run(cmd);
+ assertEquals(0, response.getResponseCode());
+
+ cmd = "select * from fetchOp";
+ driver.init();
+ driver.setMaxRows(500);
+ response = driver.run(cmd);
+ assertEquals(0, response.getResponseCode());
+ driver.getResults(result);
+ assertEquals(20, result.size());
+ driver.close();
+ }
}