You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2016/05/31 18:49:46 UTC
hive git commit: HIVE-13840: Orc split generation is reading file
footers twice (Prasanth Jayachandran reviewed by Owen O'Malley)
Repository: hive
Updated Branches:
refs/heads/master e1626ef3a -> b7166d7d5
HIVE-13840: Orc split generation is reading file footers twice (Prasanth Jayachandran reviewed by Owen O'Malley)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b7166d7d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b7166d7d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b7166d7d
Branch: refs/heads/master
Commit: b7166d7d5d5fc28c88cc245ef74b48ded20da24a
Parents: e1626ef
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Tue May 31 11:48:16 2016 -0700
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Tue May 31 11:49:27 2016 -0700
----------------------------------------------------------------------
.../java/org/apache/orc/impl/ReaderImpl.java | 1 +
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 +-
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 9 +-
.../hive/ql/io/orc/TestInputOutputFormat.java | 159 +++++++++++++++++++
4 files changed, 165 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b7166d7d/orc/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ReaderImpl.java b/orc/src/java/org/apache/orc/impl/ReaderImpl.java
index 2da590e..1dd5e43 100644
--- a/orc/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/orc/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -345,6 +345,7 @@ public class ReaderImpl implements Reader {
options.getMaxLength());
this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
}
+ options.fileMetaInfo(footerMetaData);
MetaInfoObjExtractor rInfo =
new MetaInfoObjExtractor(footerMetaData.compressionType,
footerMetaData.bufferSize,
http://git-wip-us.apache.org/repos/asf/hive/blob/b7166d7d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 087207b..185852c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -467,7 +467,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
try {
OrcFile.createReader(file.getPath(),
- OrcFile.readerOptions(conf).filesystem(fs));
+ OrcFile.readerOptions(conf).filesystem(fs).maxLength(file.getLen()));
} catch (IOException e) {
return false;
}
@@ -1391,7 +1391,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
private Reader createOrcReader() throws IOException {
return OrcFile.createReader(file.getPath(),
- OrcFile.readerOptions(context.conf).filesystem(fs));
+ OrcFile.readerOptions(context.conf).filesystem(fs).maxLength(file.getLen()));
}
private long computeProjectionSize(List<OrcProto.Type> types,
http://git-wip-us.apache.org/repos/asf/hive/blob/b7166d7d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 3a2e7d8..0b40fef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -56,10 +56,10 @@ public class ReaderImpl extends org.apache.orc.impl.ReaderImpl
//serialized footer - Keeping this around for use by getFileMetaInfo()
// will help avoid cpu cycles spend in deserializing at cost of increased
// memory footprint.
- private final ByteBuffer footerByteBuffer;
+ private ByteBuffer footerByteBuffer;
// Same for metastore cache - maintains the same background buffer, but includes postscript.
// This will only be set if the file footer/metadata was read from disk.
- private final ByteBuffer footerMetaAndPsBuffer;
+ private ByteBuffer footerMetaAndPsBuffer;
@Override
public ObjectInspector getObjectInspector() {
@@ -89,18 +89,15 @@ public class ReaderImpl extends org.apache.orc.impl.ReaderImpl
FileMetadata fileMetadata = options.getFileMetadata();
if (fileMetadata != null) {
this.inspector = OrcStruct.createObjectInspector(0, fileMetadata.getTypes());
- this.footerByteBuffer = null; // not cached and not needed here
- this.footerMetaAndPsBuffer = null;
} else {
FileMetaInfo footerMetaData;
if (options.getFileMetaInfo() != null) {
footerMetaData = options.getFileMetaInfo();
- this.footerMetaAndPsBuffer = null;
} else {
footerMetaData = extractMetaInfoFromFooter(fileSystem, path,
options.getMaxLength());
- this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
}
+ this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
MetaInfoObjExtractor rInfo =
new MetaInfoObjExtractor(footerMetaData.compressionType,
footerMetaData.bufferSize,
http://git-wip-us.apache.org/repos/asf/hive/blob/b7166d7d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 52098ae..edaecb3 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -32,6 +32,7 @@ import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Properties;
@@ -47,7 +48,9 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -600,6 +603,61 @@ public class TestInputOutputFormat {
}
@Test
+ public void testSplitGenReadOps() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ conf.set("mapred.input.dir", "mock:///mocktable");
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable");
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: listLocatedStatus - mock:/mocktable
+ // call-2: open - mock:/mocktable/0_0
+ // call-3: open - mock:/mocktable/0_0
+ assertEquals(3, readOpsDelta);
+
+ assertEquals(2, splits.length);
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
public void testBIStrategySplitBlockBoundary() throws Exception {
conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
@@ -788,6 +846,14 @@ public class TestInputOutputFormat {
this.hosts = hosts;
}
+ public void setOffset(int offset) {
+ this.offset = offset;
+ }
+
+ public void setLength(int length) {
+ this.length = length;
+ }
+
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
@@ -926,6 +992,9 @@ public class TestInputOutputFormat {
DataOutputBuffer buf = (DataOutputBuffer) getWrappedStream();
file.length = buf.getLength();
file.content = new byte[file.length];
+ MockBlock block = new MockBlock("host1");
+ block.setLength(file.length);
+ setBlocks(block);
System.arraycopy(buf.getData(), 0, file.content, 0, file.length);
}
@@ -941,6 +1010,7 @@ public class TestInputOutputFormat {
// statics for when the mock fs is created via FileSystem.get
private static String blockedUgi = null;
private final static List<MockFile> globalFiles = new ArrayList<MockFile>();
+ protected Statistics statistics;
public MockFileSystem() {
// empty
@@ -949,11 +1019,13 @@ public class TestInputOutputFormat {
@Override
public void initialize(URI uri, Configuration conf) {
setConf(conf);
+ statistics = getStatistics("mock", getClass());
}
public MockFileSystem(Configuration conf, MockFile... files) {
setConf(conf);
this.files.addAll(Arrays.asList(files));
+ statistics = getStatistics("mock", getClass());
}
public static void setBlockedUgi(String s) {
@@ -979,6 +1051,7 @@ public class TestInputOutputFormat {
@Override
public FSDataInputStream open(Path path, int i) throws IOException {
+ statistics.incrementReadOps(1);
checkAccess();
MockFile file = findFile(path);
if (file != null) return new FSDataInputStream(new MockInputStream(file));
@@ -1011,6 +1084,7 @@ public class TestInputOutputFormat {
short replication, long blockSize,
Progressable progressable
) throws IOException {
+ statistics.incrementWriteOps(1);
checkAccess();
MockFile file = findFile(path);
if (file == null) {
@@ -1024,6 +1098,7 @@ public class TestInputOutputFormat {
public FSDataOutputStream append(Path path, int bufferSize,
Progressable progressable
) throws IOException {
+ statistics.incrementWriteOps(1);
checkAccess();
return create(path, FsPermission.getDefault(), true, bufferSize,
(short) 3, 256 * 1024, progressable);
@@ -1031,24 +1106,68 @@ public class TestInputOutputFormat {
@Override
public boolean rename(Path path, Path path2) throws IOException {
+ statistics.incrementWriteOps(1);
checkAccess();
return false;
}
@Override
public boolean delete(Path path) throws IOException {
+ statistics.incrementWriteOps(1);
checkAccess();
return false;
}
@Override
public boolean delete(Path path, boolean b) throws IOException {
+ statistics.incrementWriteOps(1);
checkAccess();
return false;
}
@Override
+ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f)
+ throws IOException {
+ return new RemoteIterator<LocatedFileStatus>() {
+ private Iterator<LocatedFileStatus> iterator = listLocatedFileStatuses(f).iterator();
+
+ @Override
+ public boolean hasNext() throws IOException {
+ return iterator.hasNext();
+ }
+
+ @Override
+ public LocatedFileStatus next() throws IOException {
+ return iterator.next();
+ }
+ };
+ }
+
+ private List<LocatedFileStatus> listLocatedFileStatuses(Path path) throws IOException {
+ statistics.incrementReadOps(1);
+ checkAccess();
+ path = path.makeQualified(this);
+ List<LocatedFileStatus> result = new ArrayList<>();
+ String pathname = path.toString();
+ String pathnameAsDir = pathname + "/";
+ Set<String> dirs = new TreeSet<String>();
+ MockFile file = findFile(path);
+ if (file != null) {
+ result.add(createLocatedStatus(file));
+ return result;
+ }
+ findMatchingLocatedFiles(files, pathnameAsDir, dirs, result);
+ findMatchingLocatedFiles(globalFiles, pathnameAsDir, dirs, result);
+ // for each directory add it once
+ for(String dir: dirs) {
+ result.add(createLocatedDirectory(new MockPath(this, pathnameAsDir + dir)));
+ }
+ return result;
+ }
+
+ @Override
public FileStatus[] listStatus(Path path) throws IOException {
+ statistics.incrementReadOps(1);
checkAccess();
path = path.makeQualified(this);
List<FileStatus> result = new ArrayList<FileStatus>();
@@ -1084,6 +1203,23 @@ public class TestInputOutputFormat {
}
}
+ private void findMatchingLocatedFiles(
+ List<MockFile> files, String pathnameAsDir, Set<String> dirs, List<LocatedFileStatus> result)
+ throws IOException {
+ for (MockFile file: files) {
+ String filename = file.path.toString();
+ if (filename.startsWith(pathnameAsDir)) {
+ String tail = filename.substring(pathnameAsDir.length());
+ int nextSlash = tail.indexOf('/');
+ if (nextSlash > 0) {
+ dirs.add(tail.substring(0, nextSlash));
+ } else {
+ result.add(createLocatedStatus(file));
+ }
+ }
+ }
+ }
+
@Override
public void setWorkingDirectory(Path path) {
workingDir = path;
@@ -1096,6 +1232,7 @@ public class TestInputOutputFormat {
@Override
public boolean mkdirs(Path path, FsPermission fsPermission) {
+ statistics.incrementWriteOps(1);
return false;
}
@@ -1110,8 +1247,21 @@ public class TestInputOutputFormat {
FsPermission.createImmutable((short) 755), "owen", "group", dir);
}
+ private LocatedFileStatus createLocatedStatus(MockFile file) throws IOException {
+ FileStatus fileStatus = createStatus(file);
+ return new LocatedFileStatus(fileStatus,
+ getFileBlockLocationsImpl(fileStatus, 0, fileStatus.getLen(), false));
+ }
+
+ private LocatedFileStatus createLocatedDirectory(Path dir) throws IOException {
+ FileStatus fileStatus = createDirectory(dir);
+ return new LocatedFileStatus(fileStatus,
+ getFileBlockLocationsImpl(fileStatus, 0, fileStatus.getLen(), false));
+ }
+
@Override
public FileStatus getFileStatus(Path path) throws IOException {
+ statistics.incrementReadOps(1);
checkAccess();
path = path.makeQualified(this);
String pathnameAsDir = path.toString() + "/";
@@ -1133,6 +1283,15 @@ public class TestInputOutputFormat {
@Override
public BlockLocation[] getFileBlockLocations(FileStatus stat,
long start, long len) throws IOException {
+ return getFileBlockLocationsImpl(stat, start, len, true);
+ }
+
+ private BlockLocation[] getFileBlockLocationsImpl(final FileStatus stat, final long start,
+ final long len,
+ final boolean updateStats) throws IOException {
+ if (updateStats) {
+ statistics.incrementReadOps(1);
+ }
checkAccess();
List<BlockLocation> result = new ArrayList<BlockLocation>();
MockFile file = findFile(stat.getPath());