You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/25 16:42:59 UTC
[orc] branch branch-1.7 updated: ORC-970: Reordering statements,
improve readability in WriterImpl (#882)
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.7 by this push:
new e1fdff2 ORC-970: Reordering statements, improve readability in WriterImpl (#882)
e1fdff2 is described below
commit e1fdff2b04aa0722d2b36d14716eb046f12628ac
Author: guiyanakaung <gu...@gmail.com>
AuthorDate: Thu Aug 26 00:41:57 2021 +0800
ORC-970: Reordering statements, improve readability in WriterImpl (#882)
### What changes were proposed in this pull request?
Reordered multiple statements. Make variable initialization and usage closer.
For example, before
```
193 buildIndex = rowIndexStride > 0;
216 if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
```
Put the heavy logic last and put the check logic first.
The create physicalWriter and treeWriter statements are placed at the end of constructor.
### Why are the changes needed?
Clearer logic and improved readability.
### How was this patch tested?
Pass the CIs.
(cherry picked from commit 12e2f7488a7b5a11f734522dedf3648bd29cd1cc)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../src/java/org/apache/orc/impl/WriterImpl.java | 58 ++++++++++++----------
1 file changed, 31 insertions(+), 27 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index 4d2fa4c..969b3e2 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -148,7 +148,6 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
this.conf = opts.getConfiguration();
// clone it so that we can annotate it with encryption
this.schema = opts.getSchema().clone();
- useProlepticGregorian = opts.getProlepticGregorian();
int numColumns = schema.getMaximumId() + 1;
if (!opts.isEnforceBufferSize()) {
opts.bufferSize(getEstimatedBufferSize(opts.getStripeSize(), numColumns,
@@ -163,34 +162,34 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
opts.getKeyOverrides());
needKeyFlush = encryption.length > 0;
- // Set up the physical writer
- this.physicalWriter = opts.getPhysicalWriter() == null ?
- new PhysicalFsWriter(fs, path, opts, encryption) :
- opts.getPhysicalWriter();
- unencryptedOptions = physicalWriter.getStreamOptions();
- OutStream.assertBufferSizeValid(unencryptedOptions.getBufferSize());
-
- this.callback = opts.getCallback();
- this.writerVersion = opts.getWriterVersion();
- bloomFilterVersion = opts.getBloomFilterVersion();
this.directEncodingColumns = OrcUtils.includeColumns(
opts.getDirectEncodingColumns(), opts.getSchema());
dictionaryKeySizeThreshold =
OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf);
+
+ this.callback = opts.getCallback();
if (callback != null) {
callbackContext = () -> WriterImpl.this;
} else {
callbackContext = null;
}
+
+ this.useProlepticGregorian = opts.getProlepticGregorian();
this.writeTimeZone = hasTimestamp(schema);
this.useUTCTimeZone = opts.getUseUTCTimestamp();
- this.stripeSize = opts.getStripeSize();
- this.version = opts.getVersion();
+
this.encodingStrategy = opts.getEncodingStrategy();
this.compressionStrategy = opts.getCompressionStrategy();
+
this.rowIndexStride = opts.getRowIndexStride();
- this.memoryManager = opts.getMemoryManager();
buildIndex = rowIndexStride > 0;
+ if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
+ throw new IllegalArgumentException("Row stride must be at least " +
+ MIN_ROW_INDEX_STRIDE);
+ }
+
+ this.writerVersion = opts.getWriterVersion();
+ this.version = opts.getVersion();
if (version == OrcFile.Version.FUTURE) {
throw new IllegalArgumentException("Can not write in a unknown version.");
} else if (version == OrcFile.Version.UNSTABLE_PRE_2_0) {
@@ -198,29 +197,34 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
" readable by other versions of the software. It is only for" +
" developer testing.");
}
- boolean buildBloomFilter = buildIndex;
- if (version == OrcFile.Version.V_0_11) {
- /* do not write bloom filters for ORC v11 */
- buildBloomFilter = false;
- }
- if (!buildBloomFilter) {
+
+ this.bloomFilterVersion = opts.getBloomFilterVersion();
+ this.bloomFilterFpp = opts.getBloomFilterFpp();
+ /* do not write bloom filters for ORC v11 */
+ if (!buildIndex || version == OrcFile.Version.V_0_11) {
this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
} else {
this.bloomFilterColumns =
OrcUtils.includeColumns(opts.getBloomFilterColumns(), schema);
}
- this.bloomFilterFpp = opts.getBloomFilterFpp();
- physicalWriter.writeHeader();
- treeWriter = TreeWriter.Factory.create(schema, null, new StreamFactory());
- if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
- throw new IllegalArgumentException("Row stride must be at least " +
- MIN_ROW_INDEX_STRIDE);
- }
// ensure that we are able to handle callbacks before we register ourselves
ROWS_PER_CHECK = OrcConf.ROWS_BETWEEN_CHECKS.getLong(conf);
+ this.stripeSize = opts.getStripeSize();
memoryLimit = stripeSize;
+ memoryManager = opts.getMemoryManager();
memoryManager.addWriter(path, stripeSize, this);
+
+ // Set up the physical writer
+ this.physicalWriter = opts.getPhysicalWriter() == null ?
+ new PhysicalFsWriter(fs, path, opts, encryption) :
+ opts.getPhysicalWriter();
+ physicalWriter.writeHeader();
+ unencryptedOptions = physicalWriter.getStreamOptions();
+ OutStream.assertBufferSizeValid(unencryptedOptions.getBufferSize());
+
+ treeWriter = TreeWriter.Factory.create(schema, null, new StreamFactory());
+
LOG.info("ORC writer created for path: {} with stripeSize: {} options: {}",
path, stripeSize, unencryptedOptions);
}