You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/01/10 02:19:32 UTC
svn commit: r1650707 - in /hive/branches/llap:
llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/
llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/
llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/
ql/src/java/org/apache/ha...
Author: prasanthj
Date: Sat Jan 10 01:19:32 2015
New Revision: 1650707
URL: http://svn.apache.org/r1650707
Log:
experimental version of orc metadata cache
Added:
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java
Removed:
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcMetadataCache.java
Modified:
hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java
hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
Modified: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java (original)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java Sat Jan 10 01:19:32 2015
@@ -20,10 +20,7 @@ package org.apache.hadoop.hive.llap.io.e
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -38,19 +35,21 @@ import org.apache.hadoop.hive.llap.io.ap
import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl;
import org.apache.hadoop.hive.llap.io.api.orc.OrcBatchKey;
import org.apache.hadoop.hive.llap.io.api.orc.OrcCacheKey;
+import org.apache.hadoop.hive.llap.io.metadata.OrcMetadataCache;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.RecordReader;
import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
public class OrcEncodedDataProducer implements EncodedDataProducer<OrcBatchKey> {
private FileSystem cachedFs = null;
- private final OrcMetadataCache metadataCache = new OrcMetadataCache();
+ private Configuration conf;
+ private OrcMetadataCache metadataCache;
private final Allocator allocator;
private final Cache<OrcCacheKey> cache;
@@ -101,14 +100,8 @@ public class OrcEncodedDataProducer impl
orcReader = null;
if (stripes == null || types == null) {
orcReader = createOrcReader(split);
- if (stripes == null) {
- stripes = orcReader.getStripes();
- metadataCache.cacheStripes(internedFilePath, stripes);
- }
- if (types == null) {
- types = orcReader.getTypes();
- metadataCache.cacheTypes(internedFilePath, types);
- }
+ stripes = metadataCache.getStripes(internedFilePath);
+ types = metadataCache.getTypes(internedFilePath);
}
if (columnIds == null) {
@@ -298,10 +291,12 @@ public class OrcEncodedDataProducer impl
private Reader createOrcReader(FileSplit fileSplit) throws IOException {
FileSystem fs = cachedFs;
Path path = fileSplit.getPath();
- Configuration conf = new Configuration();
if ("pfile".equals(path.toUri().getScheme())) {
fs = path.getFileSystem(conf); // Cannot use cached FS due to hive tests' proxy FS.
}
+ if (metadataCache == null) {
+ metadataCache = new OrcMetadataCache(cachedFs, path, conf);
+ }
return OrcFile.createReader(path, OrcFile.readerOptions(conf).filesystem(fs));
}
@@ -315,6 +310,8 @@ public class OrcEncodedDataProducer impl
this.cachedFs = FileSystem.get(conf);
this.cache = cache;
this.allocator = allocator;
+ this.conf = conf;
+ this.metadataCache = null;
}
@Override
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,119 @@
+/**
+ * Copyright 2014 Prasanth Jayachandran
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.metadata;
+
+/**
+ *
+ */
+public class CompressionBuffer {
+ // stripe position within file
+ private int stripePos;
+
+ // row group position within stripe
+ private int rowGroupPos;
+
+ // start offset of compression buffer corresponding to above row group
+ private long startOffset;
+
+ // length of compression buffer (compressed or uncompressed length)
+ private long length;
+
+ // offset within compression buffer where the row group begins
+ private int uncompressedOffset;
+
+ // if uncompressedOffset is in a middle of integer encoding runs (RLE, Delta etc.), consume
+ // these many values to reach beginning of the row group
+ private int consume;
+
+ // For run length byte encoding, record the number of bits within current byte to consume to
+ // reach beginning of the row group. This is required for IS_PRESENT stream.
+ private int consumeBits;
+
+ // if last row group is set to true, it means the above row group spans compression buffer
+ // boundary. Length will span two compression buffers.
+ private boolean lastRowGroup;
+
+ private CompressionBuffer(int sp, int rgp, long s, long len, int u, int c, int cb, boolean last) {
+ this.stripePos = sp;
+ this.rowGroupPos = rgp;
+ this.startOffset = s;
+ this.length = len;
+ this.uncompressedOffset = u;
+ this.consume = c;
+ this.consumeBits = cb;
+ this.lastRowGroup = last;
+ }
+
+ private static class Builder {
+ private int stripePos;
+ private int rowGroupPos;
+ private long startOffset;
+ private long length;
+ private int offsetWithinBuffer;
+ private int consume;
+ private int consumeBits;
+ private boolean lastRowGroup;
+
+ public Builder setStripePosition(int stripePos) {
+ this.stripePos = stripePos;
+ return this;
+ }
+
+ public Builder setRowGroupPosition(int rowGroupPos) {
+ this.rowGroupPos = rowGroupPos;
+ return this;
+ }
+
+ public Builder setStartOffset(long startOffset) {
+ this.startOffset = startOffset;
+ return this;
+ }
+
+ public Builder setLength(long length) {
+ this.length = length;
+ return this;
+ }
+
+ public Builder setOffsetWithInBuffer(int offsetWithInBuffer) {
+ this.offsetWithinBuffer = offsetWithInBuffer;
+ return this;
+ }
+
+ public Builder consumeRuns(int consume) {
+ this.consume = consume;
+ return this;
+ }
+
+ public Builder consumeBits(int consumeBits) {
+ this.consumeBits = consumeBits;
+ return this;
+ }
+
+ public Builder setLastRowGroup(boolean lastRowGroup) {
+ this.lastRowGroup = lastRowGroup;
+ return this;
+ }
+
+ public CompressionBuffer build() {
+ return new CompressionBuffer(stripePos, rowGroupPos, startOffset, length, offsetWithinBuffer,
+ consume, consumeBits, lastRowGroup);
+ }
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+}
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap.io.metadata;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+
+public class OrcMetadata {
+ private CompressionKind compressionKind;
+ private int compressionBufferSize;
+ private List<OrcProto.Type> types;
+ private List<StripeInformation> stripes;
+ private Map<Integer, List<OrcProto.ColumnEncoding>> stripeToColEncodings;
+ private Map<Integer, OrcProto.RowIndex[]> stripeToRowIndexEntries;
+
+ public Map<Integer, List<OrcProto.ColumnEncoding>> getStripeToColEncodings() {
+ return stripeToColEncodings;
+ }
+
+ public void setStripeToColEncodings(
+ Map<Integer, List<OrcProto.ColumnEncoding>> stripeToColEncodings) {
+ this.stripeToColEncodings = stripeToColEncodings;
+ }
+
+ public Map<Integer, OrcProto.RowIndex[]> getStripeToRowIndexEntries() {
+ return stripeToRowIndexEntries;
+ }
+
+ public void setStripeToRowIndexEntries(
+ Map<Integer, OrcProto.RowIndex[]> stripeToRowIndexEntries) {
+ this.stripeToRowIndexEntries = stripeToRowIndexEntries;
+ }
+
+ public List<StripeInformation> getStripes() {
+ return stripes;
+ }
+
+ public void setStripes(List<StripeInformation> stripes) {
+ this.stripes = stripes;
+ }
+
+ public CompressionKind getCompressionKind() {
+ return compressionKind;
+ }
+
+ public void setCompressionKind(CompressionKind compressionKind) {
+ this.compressionKind = compressionKind;
+ }
+
+ public int getCompressionBufferSize() {
+ return compressionBufferSize;
+ }
+
+ public void setCompressionBufferSize(int compressionBufferSize) {
+ this.compressionBufferSize = compressionBufferSize;
+ }
+
+ public List<OrcProto.Type> getTypes() {
+ return types;
+ }
+
+ public void setTypes(List<OrcProto.Type> types) {
+ this.types = types;
+ }
+}
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap.io.metadata;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
+/**
+ * ORC-specific metadata cache.
+ */
+public class OrcMetadataCache {
+ private static final int DEFAULT_CACHE_ACCESS_CONCURRENCY = 10;
+ private static final int DEFAULT_MAX_CACHE_ENTRIES = 100;
+ private static Cache<String, OrcMetadata> METADATA;
+
+ static {
+ METADATA = CacheBuilder.newBuilder()
+ .concurrencyLevel(DEFAULT_CACHE_ACCESS_CONCURRENCY)
+ .maximumSize(DEFAULT_MAX_CACHE_ENTRIES)
+ .build();
+ }
+
+ private Path path;
+ private OrcMetadataLoader loader;
+
+ public OrcMetadataCache(FileSystem fs, Path path, Configuration conf) {
+ this.path = path;
+ this.loader = new OrcMetadataLoader(fs, path, conf);
+ }
+
+ public CompressionKind getCompression(String pathString) throws IOException {
+ try {
+ return METADATA.get(pathString, loader).getCompressionKind();
+ } catch (ExecutionException e) {
+ throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+ }
+ }
+
+ public int getCompressionBufferSize(String pathString) throws IOException {
+ try {
+ return METADATA.get(pathString, loader).getCompressionBufferSize();
+ } catch (ExecutionException e) {
+ throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+ }
+ }
+
+ public List<OrcProto.Type> getTypes(String pathString) throws IOException {
+ try {
+ return METADATA.get(pathString, loader).getTypes();
+ } catch (ExecutionException e) {
+ throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+ }
+ }
+
+ public List<StripeInformation> getStripes(String pathString) throws IOException {
+ try {
+ return METADATA.get(pathString, loader).getStripes();
+ } catch (ExecutionException e) {
+ throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+ }
+ }
+
+ // public boolean[] getIncludedRowGroups(String pathString, SearchArgument sarg, int stripeIdx) throws IOException {
+ // try {
+ // return METADATA.get(pathString, loader).getStripeToRowIndexEntries();
+ // } catch (ExecutionException e) {
+ // throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+ // }
+ // }
+}
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap.io.metadata;
+
+import static org.apache.hadoop.hive.ql.io.orc.OrcFile.readerOptions;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.llap.io.orc.OrcFile;
+import org.apache.hadoop.hive.llap.io.orc.Reader;
+import org.apache.hadoop.hive.llap.io.orc.RecordReader;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+
+public class OrcMetadataLoader implements Callable<OrcMetadata> {
+ private FileSystem fs;
+ private Path path;
+ private Configuration conf;
+
+ public OrcMetadataLoader(FileSystem fs, Path path, Configuration conf) {
+ this.fs = fs;
+ this.path = path;
+ this.conf = conf;
+ }
+
+ @Override
+ public OrcMetadata call() throws Exception {
+ Reader reader = OrcFile.createLLAPReader(path, readerOptions(conf).filesystem(fs));
+ OrcMetadata orcMetadata = new OrcMetadata();
+ orcMetadata.setCompressionKind(reader.getCompression());
+ orcMetadata.setCompressionBufferSize(reader.getCompressionSize());
+ List<StripeInformation> stripes = reader.getStripes();
+ orcMetadata.setStripes(stripes);
+ Map<Integer, List<OrcProto.ColumnEncoding>> stripeColEnc = new HashMap<Integer, List<OrcProto.ColumnEncoding>>();
+ Map<Integer, OrcProto.RowIndex[]> stripeRowIndices = new HashMap<Integer, OrcProto.RowIndex[]>();
+ RecordReader rows = reader.rows();
+ for (int i = 0; i < stripes.size(); i++) {
+ stripeColEnc.put(i, rows.getColumnEncodings(i));
+ stripeRowIndices.put(i, rows.getRowIndexEntries(i));
+ }
+ orcMetadata.setStripeToColEncodings(stripeColEnc);
+ orcMetadata.setStripeToRowIndexEntries(stripeRowIndices);
+ return orcMetadata;
+ }
+}
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,57 @@
+/**
+ * Copyright 2014 Prasanth Jayachandran
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.llap.io.metadata.CompressionBuffer;
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.hadoop.hive.ql.io.orc.ReaderImpl;
+
+/**
+ *
+ */
+public class LLAPReaderImpl extends ReaderImpl implements Reader {
+
+ public LLAPReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
+ super(path, options);
+ }
+
+ @Override
+ public RecordReader rows() throws IOException {
+ Reader.Options options = new Options();
+ boolean[] include = options.getInclude();
+ // if included columns is null, then include all columns
+ if (include == null) {
+ include = new boolean[footer.getTypesCount()];
+ Arrays.fill(include, true);
+ options.include(include);
+ }
+ return new LLAPRecordReaderImpl(this.getStripes(), fileSystem, path,
+ options, footer.getTypesList(), codec, bufferSize,
+ footer.getRowIndexStride(), conf);
+ }
+
+ @Override
+ public RecordReader rows(CompressionBuffer buffer, CompressionKind kind,
+ OrcProto.ColumnEncoding encoding) throws IOException {
+ return null;
+ }
+}
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.llap.Consumer;
+import org.apache.hadoop.hive.llap.io.api.EncodedColumn;
+import org.apache.hadoop.hive.llap.io.api.cache.Allocator;
+import org.apache.hadoop.hive.llap.io.api.orc.OrcBatchKey;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.orc.*;
+import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+
+/**
+ *
+ */
+public class LLAPRecordReaderImpl extends RecordReaderImpl implements RecordReader {
+ LLAPRecordReaderImpl(List<StripeInformation> stripes,
+ FileSystem fileSystem, Path path,
+ Reader.Options options,
+ List<OrcProto.Type> types, CompressionCodec codec,
+ int bufferSize, long strideRate, Configuration conf) throws IOException {
+ super(stripes, fileSystem, path, options, types, codec, bufferSize, strideRate, conf);
+ }
+
+ @Override
+ public OrcProto.RowIndex[] getRowIndexEntries(int stripeIdx) throws IOException {
+ return readRowIndex(stripeIdx);
+ }
+
+ @Override
+ public List<OrcProto.ColumnEncoding> getColumnEncodings(int stripeIdx) throws IOException {
+ StripeInformation si = stripes.get(stripeIdx);
+ OrcProto.StripeFooter sf = readStripeFooter(si);
+ return sf.getColumnsList();
+ }
+
+ @Override
+ public boolean[] getIncludedRowGroups(int stripeIdx) throws IOException {
+ currentStripe = stripeIdx;
+ return pickRowGroups();
+ }
+
+ @Override
+ public boolean hasNext() throws IOException {
+ return false;
+ }
+
+ @Override
+ public Object next(Object previous) throws IOException {
+ return null;
+ }
+
+ @Override
+ public VectorizedRowBatch nextBatch(VectorizedRowBatch previousBatch) throws IOException {
+ return null;
+ }
+
+ @Override
+ public long getRowNumber() {
+ return 0;
+ }
+
+ @Override
+ public float getProgress() {
+ return 0;
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+
+ @Override
+ public void seekToRow(long rowCount) throws IOException {
+
+ }
+
+ @Override
+ public void readEncodedColumns(long[][] colRgs, int rgCount, SearchArgument sarg,
+ Consumer<EncodedColumn<OrcBatchKey>> consumer, Allocator allocator) {
+
+ }
+}
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2014 Prasanth Jayachandran
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+
+/**
+ *
+ */
+public class OrcFile {
+
+ public static Reader createLLAPReader(Path path,
+ org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions options) throws IOException {
+ return new LLAPReaderImpl(path, options);
+ }
+}
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hive.llap.io.metadata.CompressionBuffer;
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+
+/**
+ *
+ */
+public interface Reader extends org.apache.hadoop.hive.ql.io.orc.Reader {
+
+ public RecordReader rows() throws IOException;
+
+ /**
+ * Read rows out of given compression buffer.
+ *
+ * @param buffer - compression buffer
+ * @param kind - compression kind
+ * @param encoding - column encoding
+ * @return - record reader to read rows out of it
+ * @throws IOException
+ */
+ public RecordReader rows(CompressionBuffer buffer, CompressionKind kind,
+ OrcProto.ColumnEncoding encoding) throws IOException;
+}
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java (added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java Sat Jan 10 01:19:32 2015
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+
+/**
+ *
+ */
+public interface RecordReader extends org.apache.hadoop.hive.ql.io.orc.RecordReader {
+ /**
+ * Return all row index entries for the specified stripe index.
+ *
+ * @param stripeIdx - stripe index within orc file
+ * @return - all row index entries
+ */
+ OrcProto.RowIndex[] getRowIndexEntries(int stripeIdx) throws IOException;
+
+ /**
+ * Return column encodings of all columns for the specified stripe index.
+ *
+ * @param stripeIdx - stripe index within orc file
+ * @return - column encodings of all columns
+ */
+ List<OrcProto.ColumnEncoding> getColumnEncodings(int stripeIdx) throws IOException;
+
+ /**
+ * Return the row groups that satisfy the SARG condition for the specified stripe index.
+ *
+ * @param stripeIdx - stripe index within orc file
+ * @return - row groups qualifying the SARG
+ */
+ boolean[] getIncludedRowGroups(int stripeIdx) throws IOException;
+}
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java Sat Jan 10 01:19:32 2015
@@ -23,7 +23,7 @@ import java.util.EnumSet;
import javax.annotation.Nullable;
-interface CompressionCodec {
+public interface CompressionCodec {
public enum Modifier {
/* speed/compression tradeoffs */
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sat Jan 10 01:19:32 2015
@@ -18,6 +18,14 @@
package org.apache.hadoop.hive.ql.io.orc;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT;
+
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
@@ -26,8 +34,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.*;
-
/**
* Contains factory methods to read or write ORC files.
*/
@@ -184,7 +190,7 @@ public final class OrcFile {
private ReaderImpl.FileMetaInfo fileMetaInfo;
private long maxLength = Long.MAX_VALUE;
- ReaderOptions(Configuration conf) {
+ public ReaderOptions(Configuration conf) {
this.conf = conf;
}
ReaderOptions fileMetaInfo(ReaderImpl.FileMetaInfo info) {
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Sat Jan 10 01:19:32 2015
@@ -44,23 +44,23 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.protobuf.CodedInputStream;
-final class ReaderImpl implements Reader {
+public class ReaderImpl implements Reader {
private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
- private final FileSystem fileSystem;
- private final Path path;
- private final CompressionKind compressionKind;
- private final CompressionCodec codec;
- private final int bufferSize;
+ protected final FileSystem fileSystem;
+ protected final Path path;
+ protected final CompressionKind compressionKind;
+ protected final CompressionCodec codec;
+ protected final int bufferSize;
private OrcProto.Metadata metadata = null;
private final int metadataSize;
- private final OrcProto.Footer footer;
+ protected final OrcProto.Footer footer;
private final ObjectInspector inspector;
private long deserializedSize = -1;
- private final Configuration conf;
+ protected final Configuration conf;
private final List<Integer> versionList;
private final OrcFile.WriterVersion writerVersion;
@@ -295,7 +295,7 @@ final class ReaderImpl implements Reader
* @param options options for reading
* @throws IOException
*/
- ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
+ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
FileSystem fs = options.getFilesystem();
if (fs == null) {
fs = path.getFileSystem(options.getConfiguration());
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Jan 10 01:19:32 2015
@@ -55,7 +55,6 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
-import org.apache.hadoop.hive.ql.io.orc.LlapUtils.PresentStreamReadResult;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -81,14 +80,14 @@ import org.apache.hadoop.io.Text;
import com.google.common.collect.ComparisonChain;
-class RecordReaderImpl implements RecordReader {
+public class RecordReaderImpl implements RecordReader {
private static final Log LOG = LogFactory.getLog(RecordReaderImpl.class);
private static final boolean isLogTraceEnabled = LOG.isTraceEnabled();
private final FSDataInputStream file;
private final long firstRow;
- private final List<StripeInformation> stripes =
+ protected final List<StripeInformation> stripes =
new ArrayList<StripeInformation>();
private OrcProto.StripeFooter stripeFooter;
private final long totalRowCount;
@@ -98,7 +97,7 @@ class RecordReaderImpl implements Record
private final boolean[] included;
private final long rowIndexStride;
private long rowInStripe = 0;
- private int currentStripe = -1;
+ protected int currentStripe = -1;
private long rowBaseInStripe = 0;
private long rowCountInStripe = 0;
private final Map<StreamName, InStream> streams =
@@ -236,16 +235,16 @@ class RecordReaderImpl implements Record
return result;
}
- RecordReaderImpl(List<StripeInformation> stripes,
- FileSystem fileSystem,
- Path path,
- Reader.Options options,
- List<OrcProto.Type> types,
- CompressionCodec codec,
- int bufferSize,
- long strideRate,
- Configuration conf
- ) throws IOException {
+ protected RecordReaderImpl(List<StripeInformation> stripes,
+ FileSystem fileSystem,
+ Path path,
+ Reader.Options options,
+ List<OrcProto.Type> types,
+ CompressionCodec codec,
+ int bufferSize,
+ long strideRate,
+ Configuration conf
+ ) throws IOException {
this.file = fileSystem.open(path);
this.codec = codec;
this.types = types;
@@ -2271,7 +2270,7 @@ class RecordReaderImpl implements Record
}
}
- OrcProto.StripeFooter readStripeFooter(StripeInformation stripe
+ protected OrcProto.StripeFooter readStripeFooter(StripeInformation stripe
) throws IOException {
long offset = stripe.getOffset() + stripe.getIndexLength() +
stripe.getDataLength();
@@ -2568,7 +2567,7 @@ class RecordReaderImpl implements Record
* row groups must be read.
* @throws IOException
*/
- private boolean[] pickRowGroups() throws IOException {
+ protected boolean[] pickRowGroups() throws IOException {
// if we don't have a sarg or indexes, we read everything
if (sarg == null || rowIndexStride == 0) {
return null;
@@ -3236,7 +3235,7 @@ class RecordReaderImpl implements Record
throw new IllegalArgumentException("Seek after the end of reader range");
}
- OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException {
+ protected OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException {
long offset = stripes.get(stripeIndex).getOffset();
OrcProto.StripeFooter stripeFooter;
OrcProto.RowIndex[] indexes;