You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2022/10/04 20:55:50 UTC
[orc] branch main updated: ORC-1283: ENABLE_INDEXES does not take effect
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 733457d46 ORC-1283: ENABLE_INDEXES does not take effect
733457d46 is described below
commit 733457d46a60225fae12b0609e699e9edc9ef2f0
Author: deshanxiao <de...@microsoft.com>
AuthorDate: Tue Oct 4 13:55:43 2022 -0700
ORC-1283: ENABLE_INDEXES does not take effect
### What changes were proposed in this pull request?
This PR aims to fix the problem that ENABLE_INDEXES does not take effect.
### Why are the changes needed?
Now, if the orc config `ENABLE_INDEXES` is set to `false`. Orc will still write index because orc writes to index or not is only related to the configure of `ROW_INDEX_STRIDE`.
### How was this patch tested?
Added UT
Closes #1267 from deshanxiao/deshan/fix-index-effect.
Authored-by: deshanxiao <de...@microsoft.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
java/core/src/java/org/apache/orc/OrcFile.java | 6 +++++
.../src/java/org/apache/orc/impl/WriterImpl.java | 3 ++-
.../test/org/apache/orc/impl/TestWriterImpl.java | 27 ++++++++++++++++++++++
site/_docs/hive-config.md | 2 +-
site/_docs/spark-config.md | 2 +-
5 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index fa9487b3d..406fda8cb 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -432,6 +432,7 @@ public class OrcFile {
private long stripeSizeValue;
private long stripeRowCountValue;
private long blockSizeValue;
+ private boolean buildIndex;
private int rowIndexStrideValue;
private int bufferSizeValue;
private boolean enforceBufferSize = false;
@@ -466,6 +467,7 @@ public class OrcFile {
stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
stripeRowCountValue = OrcConf.STRIPE_ROW_COUNT.getLong(tableProperties, conf);
blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
+ buildIndex = OrcConf.ENABLE_INDEXES.getBoolean(tableProperties, conf);
rowIndexStrideValue =
(int) OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf);
bufferSizeValue = (int) OrcConf.BUFFER_SIZE.getLong(tableProperties,
@@ -905,6 +907,10 @@ public class OrcFile {
return rowIndexStrideValue;
}
+ public boolean isBuildIndex() {
+ return buildIndex;
+ }
+
public CompressionStrategy getCompressionStrategy() {
return compressionStrategy;
}
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index e7d71a142..734b94061 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -184,7 +184,8 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
this.compressionStrategy = opts.getCompressionStrategy();
this.rowIndexStride = opts.getRowIndexStride();
- buildIndex = rowIndexStride > 0;
+
+ this.buildIndex = opts.isBuildIndex() && (rowIndexStride > 0);
if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
throw new IllegalArgumentException("Row stride must be at least " +
MIN_ROW_INDEX_STRIDE);
diff --git a/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
index 8db041d2c..84f4df808 100644
--- a/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
@@ -29,6 +29,7 @@ import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
+import org.apache.orc.*;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -92,6 +93,32 @@ public class TestWriterImpl {
w.close();
}
+ @Test
+ public void testNoIndexIfEnableIndexIsFalse() throws Exception {
+ conf.set(OrcConf.OVERWRITE_OUTPUT_FILE.getAttribute(), "true");
+ conf.set(OrcConf.ROW_INDEX_STRIDE.getAttribute(), "1000");
+ conf.setBoolean(OrcConf.ENABLE_INDEXES.getAttribute(), false);
+ VectorizedRowBatch b = schema.createRowBatch();
+ LongColumnVector f1 = (LongColumnVector) b.cols[0];
+ LongColumnVector f2 = (LongColumnVector) b.cols[1];
+ Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).setSchema(schema));
+ long rowCount = 1000;
+ for (int i = 0; i < rowCount; i++) {
+ f1.vector[b.size] = 1 ;
+ f2.vector[b.size] = 2 ;
+ b.size += 1;
+ if (b.size == 10) {
+ w.addRowBatch(b);
+ b.reset();
+ }
+ }
+ w.close();
+
+ for (StripeInformation information: w.getStripes()) {
+ assertEquals(0, information.getIndexLength());
+ }
+ }
+
@Test
public void testStripes() throws Exception {
conf.set(OrcConf.OVERWRITE_OUTPUT_FILE.getAttribute(), "true");
diff --git a/site/_docs/hive-config.md b/site/_docs/hive-config.md
index 8bbc66418..99e4863ea 100644
--- a/site/_docs/hive-config.md
+++ b/site/_docs/hive-config.md
@@ -16,7 +16,7 @@ orc.compress | ZLIB | high level compression = {NONE, ZLIB, S
orc.compress.size | 262,144 | compression chunk size
orc.stripe.size | 67,108,864 | memory buffer in bytes for writing
orc.row.index.stride | 10,000 | number of rows between index entries
-orc.create.index | true | create indexes?
+orc.create.index | true | whether the ORC writer create indexes as part of the file or not
orc.bloom.filter.columns | "" | comma separated list of column names
orc.bloom.filter.fpp | 0.05 | bloom filter false positive rate
diff --git a/site/_docs/spark-config.md b/site/_docs/spark-config.md
index ad3a90d58..dca4124c2 100644
--- a/site/_docs/spark-config.md
+++ b/site/_docs/spark-config.md
@@ -16,7 +16,7 @@ orc.compress | ZLIB | high level compression = {NONE, ZLIB, S
orc.compress.size | 262,144 | compression chunk size
orc.stripe.size | 67,108,864 | memory buffer in bytes for writing
orc.row.index.stride | 10,000 | number of rows between index entries
-orc.create.index | true | create indexes?
+orc.create.index | true | whether the ORC writer create indexes as part of the file or not
orc.bloom.filter.columns | "" | comma separated list of column names
orc.bloom.filter.fpp | 0.05 | bloom filter false positive rate
orc.key.provider | "hadoop" | key provider