You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2014/01/25 03:45:37 UTC
svn commit: r1561248 - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/
ql/src/java/org/apache/hadoop/hive/ql/io/orc/
ql/src/test/org/apache/hadoop/hive/ql/io/orc/
serde/src/java/org/apache/hadoop/hive/serde2/
shims/0.20/src/main/java/org/a...
Author: thejas
Date: Sat Jan 25 02:45:37 2014
New Revision: 1561248
URL: http://svn.apache.org/r1561248
Log:
Reverting HIVE-5728 patch
Removed:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewOutputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sat Jan 25 02:45:37 2014
@@ -517,19 +517,8 @@ public class HiveConf extends Configurat
// Define the default ORC stripe size
HIVE_ORC_DEFAULT_STRIPE_SIZE("hive.exec.orc.default.stripe.size",
256L * 1024 * 1024),
- // Define the default ORC index stripe
- HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride"
- , null),
- // Define the default ORC buffer size
- HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", null),
- // Define the default block padding
- HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding",
- null),
- // Define the default orc compress
- HIVE_ORC_DEFAULT_COMPRESS("hive.exec.orc.default.compress", null),
- HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD(
- "hive.exec.orc.dictionary.key.size.threshold", 0.8f),
+ HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold", 0.8f),
HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false),
HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000),
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sat Jan 25 02:45:37 2014
@@ -153,19 +153,6 @@ public final class OrcFile {
stripeSizeValue =
conf.getLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname,
DEFAULT_STRIPE_SIZE);
- rowIndexStrideValue =
- conf.getInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE
- .varname, DEFAULT_ROW_INDEX_STRIDE);
- bufferSizeValue =
- conf.getInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE.varname,
- DEFAULT_ROW_INDEX_STRIDE);
- blockPaddingValue =
- conf.getBoolean(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING
- .varname, DEFAULT_BLOCK_PADDING);
- compressValue =
- CompressionKind.valueOf(conf.get(HiveConf.ConfVars
- .HIVE_ORC_DEFAULT_COMPRESS.varname,
- DEFAULT_COMPRESSION_KIND.toString()));
String versionName =
conf.get(HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.varname);
if (versionName == null) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Sat Jan 25 02:45:37 2014
@@ -44,8 +44,9 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.ql.io.orc.Metadata;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.FileGenerator;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitGenerator;
import org.apache.hadoop.hive.ql.io.orc.Reader.FileMetaInfo;
-import org.apache.hadoop.hive.ql.io.orc.RecordReader;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.log.PerfLogger;
@@ -62,6 +63,7 @@ import org.apache.hadoop.mapred.InputFor
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.InvalidInputException;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
@@ -97,8 +99,8 @@ public class OrcInputFormat implements
private static final double MIN_INCLUDED_LOCATION = 0.80;
private static class OrcRecordReader
- implements org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> {
- private final RecordReader reader;
+ implements RecordReader<NullWritable, OrcStruct> {
+ private final org.apache.hadoop.hive.ql.io.orc.RecordReader reader;
private final long offset;
private final long length;
private final int numColumns;
@@ -109,7 +111,10 @@ public class OrcInputFormat implements
long offset, long length) throws IOException {
List<OrcProto.Type> types = file.getTypes();
numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount();
- this.reader = createReaderFromFile(file, conf, offset, length);
+ boolean[] includedColumns = findIncludedColumns(types, conf);
+ String[] columnNames = getIncludedColumnNames(types, includedColumns, conf);
+ SearchArgument sarg = createSarg(types, conf);
+ this.reader = file.rows(offset, length, includedColumns, sarg, columnNames);
this.offset = offset;
this.length = length;
}
@@ -150,19 +155,6 @@ public class OrcInputFormat implements
return progress;
}
}
-
- static RecordReader createReaderFromFile(
- Reader file, Configuration conf, long offset, long length)
- throws IOException {
- List<OrcProto.Type> types = file.getTypes();
- boolean[] includedColumns = findIncludedColumns(types, conf);
- String[] columnNames = getIncludedColumnNames(types, includedColumns,
- conf);
- SearchArgument sarg = createSarg(types, conf);
- RecordReader reader =
- file.rows(offset, length, includedColumns, sarg, columnNames);
- return reader;
- }
private static final PathFilter hiddenFileFilter = new PathFilter(){
public boolean accept(Path p){
@@ -252,15 +244,14 @@ public class OrcInputFormat implements
}
}
- @SuppressWarnings("unchecked")
@Override
- public org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct>
+ public RecordReader<NullWritable, OrcStruct>
getRecordReader(InputSplit inputSplit, JobConf conf,
Reporter reporter) throws IOException {
if (isVectorMode(conf)) {
- org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> vorr = voif.getRecordReader(inputSplit, conf,
+ RecordReader<NullWritable, VectorizedRowBatch> vorr = voif.getRecordReader(inputSplit, conf,
reporter);
- return (org.apache.hadoop.mapred.RecordReader) vorr;
+ return (RecordReader) vorr;
}
FileSplit fSplit = (FileSplit)inputSplit;
reporter.setStatus(fSplit.toString());
@@ -317,7 +308,7 @@ public class OrcInputFormat implements
* @param conf The configuration of the job
* @return the list of input {@link Path}s for the map-reduce job.
*/
- static Path[] getInputPaths(Configuration conf) throws IOException {
+ static Path[] getInputPaths(JobConf conf) throws IOException {
String dirs = conf.get("mapred.input.dir");
if (dirs == null) {
throw new IOException("Configuration mapred.input.dir is not defined.");
@@ -335,41 +326,10 @@ public class OrcInputFormat implements
* the different worker threads.
*/
static class Context {
- static class FileSplitInfo {
- FileSplitInfo(Path file, long start, long length, String[] hosts,
- FileMetaInfo fileMetaInfo) {
- this.file = file;
- this.start = start;
- this.length = length;
- this.hosts = hosts;
- this.fileMetaInfo = fileMetaInfo;
- }
- Path getPath() {
- return file;
- }
- long getStart() {
- return start;
- }
- long getLength() {
- return length;
- }
- String[] getLocations() {
- return hosts;
- }
- FileMetaInfo getFileMetaInfo() {
- return fileMetaInfo;
- }
- private Path file;
- private long start;
- private long length;
- private String[] hosts;
- FileMetaInfo fileMetaInfo;
- }
private final Configuration conf;
private static Cache<Path, FileInfo> footerCache;
private final ExecutorService threadPool;
- private final List<FileSplitInfo> splits =
- new ArrayList<FileSplitInfo>(10000);
+ private final List<OrcSplit> splits = new ArrayList<OrcSplit>(10000);
private final List<Throwable> errors = new ArrayList<Throwable>();
private final HadoopShims shims = ShimLoader.getHadoopShims();
private final long maxSize;
@@ -418,7 +378,7 @@ public class OrcInputFormat implements
* the back.
* @result the Nth file split
*/
- FileSplitInfo getResult(int index) {
+ OrcSplit getResult(int index) {
if (index >= 0) {
return splits.get(index);
} else {
@@ -596,8 +556,8 @@ public class OrcInputFormat implements
if(locations.length == 1 && file.getLen() < context.maxSize) {
String[] hosts = locations[0].getHosts();
synchronized (context.splits) {
- context.splits.add(new Context.FileSplitInfo(file.getPath(), 0,
- file.getLen(), hosts, fileMetaInfo));
+ context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(),
+ hosts, fileMetaInfo));
}
} else {
// if it requires a compute task
@@ -683,8 +643,8 @@ public class OrcInputFormat implements
hostList.toArray(hosts);
}
synchronized (context.splits) {
- context.splits.add(new Context.FileSplitInfo(file.getPath(), offset,
- length, hosts, fileMetaInfo));
+ context.splits.add(new OrcSplit(file.getPath(), offset, length,
+ hosts, fileMetaInfo));
}
}
@@ -891,45 +851,35 @@ public class OrcInputFormat implements
}
}
- static List<Context.FileSplitInfo> generateSplitsInfo(Configuration conf)
- throws IOException {
- // use threads to resolve directories into splits
- Context context = new Context(conf);
- for(Path dir: getInputPaths(conf)) {
- FileSystem fs = dir.getFileSystem(conf);
- context.schedule(new FileGenerator(context, fs, dir));
- }
- context.waitForTasks();
- // deal with exceptions
- if (!context.errors.isEmpty()) {
- List<IOException> errors =
- new ArrayList<IOException>(context.errors.size());
- for(Throwable th: context.errors) {
- if (th instanceof IOException) {
- errors.add((IOException) th);
- } else {
- throw new RuntimeException("serious problem", th);
- }
- }
- throw new InvalidInputException(errors);
- }
- if (context.cacheStripeDetails) {
- LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/"
- + context.numFilesCounter.get());
- }
- return context.splits;
- }
@Override
public InputSplit[] getSplits(JobConf job,
int numSplits) throws IOException {
+ // use threads to resolve directories into splits
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS);
- List<OrcInputFormat.Context.FileSplitInfo> splits =
- OrcInputFormat.generateSplitsInfo(job);
- InputSplit[] result = new InputSplit[splits.size()];
- for (int i=0;i<splits.size();i++) {
- OrcInputFormat.Context.FileSplitInfo split = splits.get(i);
- result[i] = new OrcSplit(split.getPath(), split.getStart(),
- split.getLength(), split.getLocations(), split.getFileMetaInfo());
+ Context context = new Context(job);
+ for(Path dir: getInputPaths(job)) {
+ FileSystem fs = dir.getFileSystem(job);
+ context.schedule(new FileGenerator(context, fs, dir));
+ }
+ context.waitForTasks();
+ // deal with exceptions
+ if (!context.errors.isEmpty()) {
+ List<IOException> errors =
+ new ArrayList<IOException>(context.errors.size());
+ for(Throwable th: context.errors) {
+ if (th instanceof IOException) {
+ errors.add((IOException) th);
+ } else {
+ throw new RuntimeException("serious problem", th);
+ }
+ }
+ throw new InvalidInputException(errors);
+ }
+ InputSplit[] result = new InputSplit[context.splits.size()];
+ context.splits.toArray(result);
+ if (context.cacheStripeDetails) {
+ LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/"
+ + context.numFilesCounter.get());
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS);
return result;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Sat Jan 25 02:45:37 2014
@@ -43,7 +43,7 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.io.Writable;
-final public class OrcStruct implements Writable {
+final class OrcStruct implements Writable {
private Object[] fields;
@@ -461,7 +461,7 @@ final public class OrcStruct implements
}
}
- static public ObjectInspector createObjectInspector(TypeInfo info) {
+ static ObjectInspector createObjectInspector(TypeInfo info) {
switch (info.getCategory()) {
case PRIMITIVE:
switch (((PrimitiveTypeInfo) info).getPrimitiveCategory()) {
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Sat Jan 25 02:45:37 2014
@@ -428,7 +428,7 @@ public class TestInputOutputFormat {
new OrcInputFormat.SplitGenerator(context, fs,
fs.getFileStatus(new Path("/a/file")), null);
splitter.createSplit(0, 200, null);
- OrcInputFormat.Context.FileSplitInfo result = context.getResult(-1);
+ FileSplit result = context.getResult(-1);
assertEquals(0, result.getStart());
assertEquals(200, result.getLength());
assertEquals("/a/file", result.getPath().toString());
@@ -477,7 +477,7 @@ public class TestInputOutputFormat {
}
throw new IOException("Errors during splitting");
}
- OrcInputFormat.Context.FileSplitInfo result = context.getResult(0);
+ FileSplit result = context.getResult(0);
assertEquals(3, result.getStart());
assertEquals(497, result.getLength());
result = context.getResult(1);
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java Sat Jan 25 02:45:37 2014
@@ -31,9 +31,9 @@ import org.apache.hadoop.util.StringUtil
public final class ColumnProjectionUtils {
public static final String READ_COLUMN_IDS_CONF_STR = "hive.io.file.readcolumn.ids";
- public static final String READ_ALL_COLUMNS = "hive.io.file.read.all.columns";
public static final String READ_COLUMN_NAMES_CONF_STR = "hive.io.file.readcolumn.names";
private static final String READ_COLUMN_IDS_CONF_STR_DEFAULT = "";
+ private static final String READ_ALL_COLUMNS = "hive.io.file.read.all.columns";
private static final boolean READ_ALL_COLUMNS_DEFAULT = true;
/**
Modified: hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
URL: http://svn.apache.org/viewvc/hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (original)
+++ hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java Sat Jan 25 02:45:37 2014
@@ -773,8 +773,4 @@ public class Hadoop20Shims implements Ha
ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed");
return ret;
}
- @Override
- public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) {
- return context.getConfiguration();
- }
}
Modified: hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
URL: http://svn.apache.org/viewvc/hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java (original)
+++ hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java Sat Jan 25 02:45:37 2014
@@ -410,9 +410,4 @@ public class Hadoop20SShims extends Hado
ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed");
return ret;
}
-
- @Override
- public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) {
- return context.getConfiguration();
- }
}
Modified: hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
URL: http://svn.apache.org/viewvc/hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java (original)
+++ hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java Sat Jan 25 02:45:37 2014
@@ -557,9 +557,4 @@ public class Hadoop23Shims extends Hadoo
ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed");
return ret;
}
-
- @Override
- public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) {
- return context.getConfiguration();
- }
}
Modified: hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
URL: http://svn.apache.org/viewvc/hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java?rev=1561248&r1=1561247&r2=1561248&view=diff
==============================================================================
--- hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java (original)
+++ hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java Sat Jan 25 02:45:37 2014
@@ -520,10 +520,4 @@ public interface HadoopShims {
public FileSystem createProxyFileSystem(FileSystem fs, URI uri);
public Map<String, String> getHadoopConfNames();
-
-
- /**
- * Get configuration from JobContext
- */
- public Configuration getConfiguration(JobContext context);
}